In [1]:
#!pip3 install ray torch torchvision tabulate tensorboard
#!pip3 install 'ray[rllib]'
#!pip3 install ray

In [2]:
import warnings
warnings.filterwarnings('ignore')

import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

from models import VisualEncoder
from train import *

In [3]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder(features_dim)
        #self.encoder.load_state_dict(
        #    torch.load("Visual Autoencoder weights and models/encoder_weigths.pth", map_location=torch.device('cpu'))
        #)
        self.action_head = nn.Linear(features_dim, action_space.n)
        self.value_head = nn.Linear(features_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
            
        features = self.encoder(obs)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]="0"

def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C22']))
    env = PovOnlyWrapper(env)
    env = IgluActionWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO C22 not pretrained"
                  }
              }

        },
        loggers=[WandbLogger])#callbacks=[
        #    CustomLoggerCallback(),
        #])

Trial name,status,loc
PPO_my_env_8f2a6_00000,PENDING,


2021-09-16 11:16:37,912	INFO wandb.py:170 -- Already logged into W&B.
[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=16561)[0m 2021-09-16 11:16:43,997	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=16561)[0m 2021-09-16 11:16:43,997	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=16561)[0m 2021-09-16 11:16:53,758	INFO trainable.py:109 -- Trainable.setup took 14.352 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 7998
  custom_metrics: {}
  date: 2021-09-16_11-19-58
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.16666666666666666
  episode_reward_min: 0.0
  episodes_this_iter: 6
  episodes_total: 6
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000000000004
          cur_lr: 5.0000000000000016e-05
          entropy: 2.8881016077533843
          entropy_coeff: 0.01
          kl: 0.002688916184766008
          policy_loss: -0.005954116642955811
          total_loss: -0.029144969095490993
          vf_explained_var: -0.8339848518371582
          vf_loss: 0.005152379287163043
    num_agent_steps_sampled: 7998
    num_agent_steps_trained: 7998
    num_steps_sampled: 7998
    num_steps_trained: 7998
  iterations_since_restore: 1
  node_ip: 192.168.1.96
  num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,1,184.435,7998,0.166667,1,0,1000


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 15996
  custom_metrics: {}
  date: 2021-09-16_11-21-02
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 15
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.8810511558286604
          entropy_coeff: 0.01
          kl: 0.0069766820947797245
          policy_loss: -0.018501796059921305
          total_loss: 0.01548046025157135
          vf_explained_var: -0.6140614748001099
          vf_loss: 0.06209509945197218
    num_agent_steps_sampled: 15996
    num_agent_steps_trained: 15996
    num_steps_sampled: 15996
    num_steps_trained: 15996
  iterations_since_restore: 2
  node_ip: 192.168.1.96
  num_healthy_wo

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,2,248.368,15996,0,1,-2,1000


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 23994
  custom_metrics: {}
  date: 2021-09-16_11-22-07
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.5238095238095238
  episode_reward_min: -3.0
  episodes_this_iter: 6
  episodes_total: 21
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.866032196629432
          entropy_coeff: 0.01
          kl: 0.007494281796975792
          policy_loss: -0.01528315903038107
          total_loss: 0.13063358450008014
          vf_explained_var: -0.00601164810359478
          vf_loss: 0.17382763715511593
    num_agent_steps_sampled: 23994
    num_agent_steps_trained: 23994
    num_steps_sampled: 23994
    num_steps_trained: 23994
  iterations_since_restore: 3
  node_ip: 192.168.1.96
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,3,313.148,23994,-0.52381,1,-3,1000


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 31992
  custom_metrics: {}
  date: 2021-09-16_11-23-15
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -1.1333333333333333
  episode_reward_min: -9.0
  episodes_this_iter: 9
  episodes_total: 30
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.8383094051832796
          entropy_coeff: 0.01
          kl: 0.00997770332835937
          policy_loss: -0.029522087469056088
          total_loss: 0.28025808563275684
          vf_explained_var: 0.2117086797952652
          vf_loss: 0.3371654956130892
    num_agent_steps_sampled: 31992
    num_agent_steps_trained: 31992
    num_steps_sampled: 31992
    num_steps_trained: 31992
  iterations_since_restore: 4
  node_ip: 192.168.1.96
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,4,381.694,31992,-1.13333,1,-9,1000


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 39990
  custom_metrics: {}
  date: 2021-09-16_11-24-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -1.4102564102564104
  episode_reward_min: -9.0
  episodes_this_iter: 9
  episodes_total: 39
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.826024647169216
          entropy_coeff: 0.01
          kl: 0.008174105763742681
          policy_loss: -0.03430906725666856
          total_loss: 0.16249907573945419
          vf_explained_var: 0.27097663283348083
          vf_loss: 0.2242509783955381
    num_agent_steps_sampled: 39990
    num_agent_steps_trained: 39990
    num_steps_sampled: 39990
    num_steps_trained: 39990
  iterations_since_restore: 5
  node_ip: 192.168.1.96
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,5,449.456,39990,-1.41026,2,-9,1000


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 47988
  custom_metrics: {}
  date: 2021-09-16_11-25-29
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -1.1777777777777778
  episode_reward_min: -9.0
  episodes_this_iter: 6
  episodes_total: 45
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.808658920052231
          entropy_coeff: 0.01
          kl: 0.008462480386543365
          policy_loss: -0.025535488747540983
          total_loss: 0.21682530496549862
          vf_explained_var: 0.11320728808641434
          vf_loss: 0.2696011338868649
    num_agent_steps_sampled: 47988
    num_agent_steps_trained: 47988
    num_steps_sampled: 47988
    num_steps_trained: 47988
  iterations_since_restore: 6
  node_ip: 192.168.1.96
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,6,515.567,47988,-1.17778,2,-9,1000


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 55986
  custom_metrics: {}
  date: 2021-09-16_11-26-46
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.8703703703703703
  episode_reward_min: -9.0
  episodes_this_iter: 9
  episodes_total: 54
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.786359139155316
          entropy_coeff: 0.01
          kl: 0.009055078653998658
          policy_loss: -0.03028115157879168
          total_loss: 0.13331852041495343
          vf_explained_var: 0.2560194134712219
          vf_loss: 0.1905577555293798
    num_agent_steps_sampled: 55986
    num_agent_steps_trained: 55986
    num_steps_sampled: 55986
    num_steps_trained: 55986
  iterations_since_restore: 7
  node_ip: 192.168.1.96
  num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,7,592.898,55986,-0.87037,2,-9,1000


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 63984
  custom_metrics: {}
  date: 2021-09-16_11-27-52
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.6190476190476191
  episode_reward_min: -9.0
  episodes_this_iter: 9
  episodes_total: 63
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.776591469651909
          entropy_coeff: 0.01
          kl: 0.009038241556711277
          policy_loss: -0.02004421950488161
          total_loss: 0.15451362164771204
          vf_explained_var: 0.08677450567483902
          vf_loss: 0.2014199320151801
    num_agent_steps_sampled: 63984
    num_agent_steps_trained: 63984
    num_steps_sampled: 63984
    num_steps_trained: 63984
  iterations_since_restore: 8
  node_ip: 192.168.1.96
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,8,658.05,63984,-0.619048,2,-9,1000


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 71982
  custom_metrics: {}
  date: 2021-09-16_11-28-55
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.5217391304347826
  episode_reward_min: -9.0
  episodes_this_iter: 6
  episodes_total: 69
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.738396813023475
          entropy_coeff: 0.01
          kl: 0.008848986293529993
          policy_loss: -0.019896211192732858
          total_loss: 0.12755512626721494
          vf_explained_var: 0.08280733972787857
          vf_loss: 0.17395040721254504
    num_agent_steps_sampled: 71982
    num_agent_steps_trained: 71982
    num_steps_sampled: 71982
    num_steps_trained: 71982
  iterations_since_restore: 9
  node_ip: 192.168.1.96
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,9,721.557,71982,-0.521739,2,-9,1000


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 79980
  custom_metrics: {}
  date: 2021-09-16_11-29-58
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.44871794871794873
  episode_reward_min: -9.0
  episodes_this_iter: 9
  episodes_total: 78
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.722803311194143
          entropy_coeff: 0.01
          kl: 0.008820954381142056
          policy_loss: -0.032651735706034524
          total_loss: 0.13398446832553193
          vf_explained_var: 0.243268221616745
          vf_loss: 0.19298214290456306
    num_agent_steps_sampled: 79980
    num_agent_steps_trained: 79980
    num_steps_sampled: 79980
    num_steps_trained: 79980
  iterations_since_restore: 10
  node_ip: 192.168.1.96
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,10,783.98,79980,-0.448718,2,-9,1000


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 87978
  custom_metrics: {}
  date: 2021-09-16_11-30-56
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: -0.3333333333333333
  episode_reward_min: -9.0
  episodes_this_iter: 9
  episodes_total: 87
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.7351321648525935
          entropy_coeff: 0.01
          kl: 0.008737843671759003
          policy_loss: -0.033650567164013705
          total_loss: 0.06209255929120005
          vf_explained_var: 0.2675935626029968
          vf_loss: 0.12222066335880788
    num_agent_steps_sampled: 87978
    num_agent_steps_trained: 87978
    num_steps_sampled: 87978
    num_steps_trained: 87978
  iterations_since_restore: 11
  node_ip: 192.168.1.96
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,11,842.494,87978,-0.333333,4,-9,1000




Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 95976
  custom_metrics: {}
  date: 2021-09-16_11-32-37
  done: false
  episode_len_mean: 996.3125
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: -0.17708333333333334
  episode_reward_min: -9.0
  episodes_this_iter: 9
  episodes_total: 96
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.7290013469675536
          entropy_coeff: 0.01
          kl: 0.00932654607253701
          policy_loss: -0.025135908913748558
          total_loss: 0.11684737002416964
          vf_explained_var: 0.18681135773658752
          vf_loss: 0.16834063702262939
    num_agent_steps_sampled: 95976
    num_agent_steps_trained: 95976
    num_steps_sampled: 95976
    num_steps_trained: 95976
  iterations_since_restore: 12
  node_ip: 192.168.1.9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,12,942.978,95976,-0.177083,4,-9,996.312


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 103974
  custom_metrics: {}
  date: 2021-09-16_11-33-56
  done: false
  episode_len_mean: 996.46
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: -0.08
  episode_reward_min: -9.0
  episodes_this_iter: 6
  episodes_total: 102
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.7226495319797146
          entropy_coeff: 0.01
          kl: 0.009664144447838358
          policy_loss: -0.025963372660059762
          total_loss: 0.16033497497299184
          vf_explained_var: 0.15665513277053833
          vf_loss: 0.21255842908010167
    num_agent_steps_sampled: 103974
    num_agent_steps_trained: 103974
    num_steps_sampled: 103974
    num_steps_trained: 103974
  iterations_since_restore: 13
  node_ip: 192.168.1.96
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,13,1021.96,103974,-0.08,4,-9,996.46


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 111972
  custom_metrics: {}
  date: 2021-09-16_11-35-14
  done: false
  episode_len_mean: 996.46
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: -0.02
  episode_reward_min: -9.0
  episodes_this_iter: 9
  episodes_total: 111
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.710610889619397
          entropy_coeff: 0.01
          kl: 0.009550321380593096
          policy_loss: -0.035924607719124486
          total_loss: 0.06735979847369655
          vf_explained_var: 0.09175171703100204
          vf_loss: 0.12943548189428564
    num_agent_steps_sampled: 111972
    num_agent_steps_trained: 111972
    num_steps_sampled: 111972
    num_steps_trained: 111972
  iterations_since_restore: 14
  node_ip: 192.168.1.96
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,14,1100.65,111972,-0.02,4,-9,996.46


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 119970
  custom_metrics: {}
  date: 2021-09-16_11-36-22
  done: false
  episode_len_mean: 996.46
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 0.14
  episode_reward_min: -9.0
  episodes_this_iter: 9
  episodes_total: 120
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.6952090247984857
          entropy_coeff: 0.01
          kl: 0.010876209339983057
          policy_loss: -0.04096266748383641
          total_loss: 0.12682537371192568
          vf_explained_var: 0.18071217834949493
          vf_loss: 0.1936525094042462
    num_agent_steps_sampled: 119970
    num_agent_steps_trained: 119970
    num_steps_sampled: 119970
    num_steps_trained: 119970
  iterations_since_restore: 15
  node_ip: 192.168.1.96
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,15,1168.54,119970,0.14,4,-9,996.46


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 127968
  custom_metrics: {}
  date: 2021-09-16_11-37-27
  done: false
  episode_len_mean: 996.46
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 0.28
  episode_reward_min: -9.0
  episodes_this_iter: 6
  episodes_total: 126
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.6792934102396813
          entropy_coeff: 0.01
          kl: 0.011083304086060893
          policy_loss: -0.03504054083298611
          total_loss: 0.07663222109217958
          vf_explained_var: 0.24995848536491394
          vf_loss: 0.13735736470697046
    num_agent_steps_sampled: 127968
    num_agent_steps_trained: 127968
    num_steps_sampled: 127968
    num_steps_trained: 127968
  iterations_since_restore: 16
  node_ip: 192.168.1.96
  num_heal

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,16,1233.36,127968,0.28,4,-9,996.46


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 135966
  custom_metrics: {}
  date: 2021-09-16_11-38-30
  done: false
  episode_len_mean: 996.46
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 0.65
  episode_reward_min: -5.0
  episodes_this_iter: 9
  episodes_total: 135
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.6749989978728754
          entropy_coeff: 0.01
          kl: 0.010698700096124182
          policy_loss: -0.034340334133875945
          total_loss: 0.19364940883511658
          vf_explained_var: 0.23379212617874146
          vf_loss: 0.25366986276161285
    num_agent_steps_sampled: 135966
    num_agent_steps_trained: 135966
    num_steps_sampled: 135966
    num_steps_trained: 135966
  iterations_since_restore: 17
  node_ip: 192.168.1.96
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,17,1295.54,135966,0.65,4,-5,996.46


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 143964
  custom_metrics: {}
  date: 2021-09-16_11-39-31
  done: false
  episode_len_mean: 996.46
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 0.72
  episode_reward_min: -3.0
  episodes_this_iter: 9
  episodes_total: 144
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.6620880526881066
          entropy_coeff: 0.01
          kl: 0.011162646149588108
          policy_loss: -0.039232920260438994
          total_loss: 0.09086696411913602
          vf_explained_var: 0.1298505812883377
          vf_loss: 0.15560449967749157
    num_agent_steps_sampled: 143964
    num_agent_steps_trained: 143964
    num_steps_sampled: 143964
    num_steps_trained: 143964
  iterations_since_restore: 18
  node_ip: 192.168.1.96
  num_heal

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,18,1357.12,143964,0.72,4,-3,996.46


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 151962
  custom_metrics: {}
  date: 2021-09-16_11-40-35
  done: false
  episode_len_mean: 996.46
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 0.76
  episode_reward_min: -3.0
  episodes_this_iter: 6
  episodes_total: 150
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.6688474650024085
          entropy_coeff: 0.01
          kl: 0.010640450947400695
          policy_loss: -0.03395673263197144
          total_loss: 0.15609483885108144
          vf_explained_var: 0.15117712318897247
          vf_loss: 0.215676001300134
    num_agent_steps_sampled: 151962
    num_agent_steps_trained: 151962
    num_steps_sampled: 151962
    num_steps_trained: 151962
  iterations_since_restore: 19
  node_ip: 192.168.1.96
  num_health

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,19,1420.56,151962,0.76,4,-3,996.46


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 159960
  custom_metrics: {}
  date: 2021-09-16_11-41-39
  done: false
  episode_len_mean: 996.46
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 0.79
  episode_reward_min: -3.0
  episodes_this_iter: 9
  episodes_total: 159
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.646208614431402
          entropy_coeff: 0.01
          kl: 0.011744973155672047
          policy_loss: -0.031779992810740905
          total_loss: 0.2207532058780392
          vf_explained_var: 0.2052232176065445
          vf_loss: 0.2778207877821099
    num_agent_steps_sampled: 159960
    num_agent_steps_trained: 159960
    num_steps_sampled: 159960
    num_steps_trained: 159960
  iterations_since_restore: 20
  node_ip: 192.168.1.96
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,20,1485.35,159960,0.79,4,-3,996.46


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 167958
  custom_metrics: {}
  date: 2021-09-16_11-42-43
  done: false
  episode_len_mean: 996.46
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 0.84
  episode_reward_min: -3.0
  episodes_this_iter: 9
  episodes_total: 168
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.66153879832196
          entropy_coeff: 0.01
          kl: 0.01070887426503301
          policy_loss: -0.03491573014687146
          total_loss: 0.0739468068595455
          vf_explained_var: 0.10461995750665665
          vf_loss: 0.1344070377807203
    num_agent_steps_sampled: 167958
    num_agent_steps_trained: 167958
    num_steps_sampled: 167958
    num_steps_trained: 167958
  iterations_since_restore: 21
  node_ip: 192.168.1.96
  num_healthy_w

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,21,1548.56,167958,0.84,4,-3,996.46


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 175956
  custom_metrics: {}
  date: 2021-09-16_11-43-46
  done: false
  episode_len_mean: 996.46
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 0.91
  episode_reward_min: -3.0
  episodes_this_iter: 6
  episodes_total: 174
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.640846380623438
          entropy_coeff: 0.01
          kl: 0.0112574302182743
          policy_loss: -0.03175792544279047
          total_loss: 0.02230186160673858
          vf_explained_var: 0.15102416276931763
          vf_loss: 0.07934250728989531
    num_agent_steps_sampled: 175956
    num_agent_steps_trained: 175956
    num_steps_sampled: 175956
    num_steps_trained: 175956
  iterations_since_restore: 22
  node_ip: 192.168.1.96
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,22,1612.04,175956,0.91,4,-3,996.46




Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 183954
  custom_metrics: {}
  date: 2021-09-16_11-45-13
  done: false
  episode_len_mean: 991.67
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 0.99
  episode_reward_min: -3.0
  episodes_this_iter: 9
  episodes_total: 183
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.5951980365219938
          entropy_coeff: 0.01
          kl: 0.012253395637303693
          policy_loss: -0.03897907147384299
          total_loss: 0.018147120102539018
          vf_explained_var: 0.2999287545681
          vf_loss: 0.08185283208531277
    num_agent_steps_sampled: 183954
    num_agent_steps_trained: 183954
    num_steps_sampled: 183954
    num_steps_trained: 183954
  iterations_since_restore: 23
  node_ip: 192.168.1.96
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,23,1698.33,183954,0.99,4,-3,991.67


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 191952
  custom_metrics: {}
  date: 2021-09-16_11-46-16
  done: false
  episode_len_mean: 994.04
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.05
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 192
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.59551025846953
          entropy_coeff: 0.01
          kl: 0.012242155842591966
          policy_loss: -0.04673371776858325
          total_loss: 0.04530597931845615
          vf_explained_var: 0.2936638295650482
          vf_loss: 0.1167705833933176
    num_agent_steps_sampled: 191952
    num_agent_steps_trained: 191952
    num_steps_sampled: 191952
    num_steps_trained: 191952
  iterations_since_restore: 24
  node_ip: 192.168.1.96
  num_healthy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,24,1761.68,191952,1.05,4,-2,994.04


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 199950
  custom_metrics: {}
  date: 2021-09-16_11-47-18
  done: false
  episode_len_mean: 995.21
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.03
  episode_reward_min: -2.0
  episodes_this_iter: 6
  episodes_total: 198
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.621447112739727
          entropy_coeff: 0.01
          kl: 0.012827699700947902
          policy_loss: -0.040775396425517335
          total_loss: 0.032160328888905146
          vf_explained_var: 0.19416747987270355
          vf_loss: 0.09786742551913184
    num_agent_steps_sampled: 199950
    num_agent_steps_trained: 199950
    num_steps_sampled: 199950
    num_steps_trained: 199950
  iterations_since_restore: 25
  node_ip: 192.168.1.96
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,25,1823.41,199950,1.03,4,-2,995.21


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 207948
  custom_metrics: {}
  date: 2021-09-16_11-48-20
  done: false
  episode_len_mean: 995.21
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.03
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 207
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.595196802641756
          entropy_coeff: 0.01
          kl: 0.013876440128516864
          policy_loss: -0.04460880148414803
          total_loss: 0.019621311918261553
          vf_explained_var: 0.21758291125297546
          vf_loss: 0.08879443628166461
    num_agent_steps_sampled: 207948
    num_agent_steps_trained: 207948
    num_steps_sampled: 207948
    num_steps_trained: 207948
  iterations_since_restore: 26
  node_ip: 192.168.1.96
  num_heal

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,26,1885.2,207948,1.03,4,-2,995.21


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 215946
  custom_metrics: {}
  date: 2021-09-16_11-49-22
  done: false
  episode_len_mean: 995.21
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.1
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 216
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.574108123266569
          entropy_coeff: 0.01
          kl: 0.01414900815840626
          policy_loss: -0.03546152598955619
          total_loss: 0.0385273184376939
          vf_explained_var: 0.12288613617420197
          vf_loss: 0.09831502440320428
    num_agent_steps_sampled: 215946
    num_agent_steps_trained: 215946
    num_steps_sampled: 215946
    num_steps_trained: 215946
  iterations_since_restore: 27
  node_ip: 192.168.1.96
  num_healthy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,27,1947.33,215946,1.1,4,-2,995.21


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 223944
  custom_metrics: {}
  date: 2021-09-16_11-50-23
  done: false
  episode_len_mean: 995.21
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.12
  episode_reward_min: -1.0
  episodes_this_iter: 6
  episodes_total: 222
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.557285665953031
          entropy_coeff: 0.01
          kl: 0.011574813922962923
          policy_loss: -0.03287142490797866
          total_loss: 0.016256824955945052
          vf_explained_var: 0.13122741878032684
          vf_loss: 0.07354362520128889
    num_agent_steps_sampled: 223944
    num_agent_steps_trained: 223944
    num_steps_sampled: 223944
    num_steps_trained: 223944
  iterations_since_restore: 28
  node_ip: 192.168.1.96
  num_heal

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,28,2008.62,223944,1.12,4,-1,995.21


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 231942
  custom_metrics: {}
  date: 2021-09-16_11-51-25
  done: false
  episode_len_mean: 995.21
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.13
  episode_reward_min: -1.0
  episodes_this_iter: 9
  episodes_total: 231
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.5339522018227525
          entropy_coeff: 0.01
          kl: 0.014663932680889514
          policy_loss: -0.04794177598450133
          total_loss: -0.016420470237211195
          vf_explained_var: 0.35908690094947815
          vf_loss: 0.055394434041508865
    num_agent_steps_sampled: 231942
    num_agent_steps_trained: 231942
    num_steps_sampled: 231942
    num_steps_trained: 231942
  iterations_since_restore: 29
  node_ip: 192.168.1.96
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,29,2070.17,231942,1.13,4,-1,995.21


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 239940
  custom_metrics: {}
  date: 2021-09-16_11-52-26
  done: false
  episode_len_mean: 995.21
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.24
  episode_reward_min: -1.0
  episodes_this_iter: 9
  episodes_total: 240
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.586463124521317
          entropy_coeff: 0.01
          kl: 0.01366295135804194
          policy_loss: -0.04687954148337726
          total_loss: -0.003363801510904425
          vf_explained_var: 0.16269417107105255
          vf_loss: 0.0680140752517443
    num_agent_steps_sampled: 239940
    num_agent_steps_trained: 239940
    num_steps_sampled: 239940
    num_steps_trained: 239940
  iterations_since_restore: 30
  node_ip: 192.168.1.96
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,30,2131.07,239940,1.24,4,-1,995.21


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 247938
  custom_metrics: {}
  date: 2021-09-16_11-53-26
  done: false
  episode_len_mean: 995.21
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.3
  episode_reward_min: -1.0
  episodes_this_iter: 6
  episodes_total: 246
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.5571753712110623
          entropy_coeff: 0.01
          kl: 0.013447373668381349
          policy_loss: -0.041440258107037956
          total_loss: -0.0074527545271301145
          vf_explained_var: 0.13404233753681183
          vf_loss: 0.058214519909014724
    num_agent_steps_sampled: 247938
    num_agent_steps_trained: 247938
    num_steps_sampled: 247938
    num_steps_trained: 247938
  iterations_since_restore: 31
  node_ip: 192.168.1.96
  num_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,31,2191.46,247938,1.3,4,-1,995.21


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 255936
  custom_metrics: {}
  date: 2021-09-16_11-54-26
  done: false
  episode_len_mean: 995.21
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.4
  episode_reward_min: -1.0
  episodes_this_iter: 9
  episodes_total: 255
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.5551416215076244
          entropy_coeff: 0.01
          kl: 0.014135348108204689
          policy_loss: -0.04820424225181341
          total_loss: -0.00684582963786138
          vf_explained_var: 0.32279059290885925
          vf_loss: 0.06549629360088147
    num_agent_steps_sampled: 255936
    num_agent_steps_trained: 255936
    num_steps_sampled: 255936
    num_steps_trained: 255936
  iterations_since_restore: 32
  node_ip: 192.168.1.96
  num_heal

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,32,2251.4,255936,1.4,4,-1,995.21


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 263934
  custom_metrics: {}
  date: 2021-09-16_11-55-26
  done: false
  episode_len_mean: 995.21
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.38
  episode_reward_min: -1.0
  episodes_this_iter: 9
  episodes_total: 264
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.556210273568348
          entropy_coeff: 0.01
          kl: 0.014527659315608286
          policy_loss: -0.036297927643623086
          total_loss: -0.00791272993364762
          vf_explained_var: 0.1879570186138153
          vf_loss: 0.052494532938781244
    num_agent_steps_sampled: 263934
    num_agent_steps_trained: 263934
    num_steps_sampled: 263934
    num_steps_trained: 263934
  iterations_since_restore: 33
  node_ip: 192.168.1.96
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,33,2310.97,263934,1.38,4,-1,995.21




Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 271932
  custom_metrics: {}
  date: 2021-09-16_11-56-44
  done: false
  episode_len_mean: 990.96
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.37
  episode_reward_min: -1.0
  episodes_this_iter: 9
  episodes_total: 273
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.57078369176516
          entropy_coeff: 0.01
          kl: 0.013929636927755258
          policy_loss: -0.04249770745700125
          total_loss: -0.00852261502996728
          vf_explained_var: 0.2412363737821579
          vf_loss: 0.0582899660440988
    num_agent_steps_sampled: 271932
    num_agent_steps_trained: 271932
    num_steps_sampled: 271932
    num_steps_trained: 271932
  iterations_since_restore: 34
  node_ip: 192.168.1.96
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,34,2389.41,271932,1.37,4,-1,990.96


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 279930
  custom_metrics: {}
  date: 2021-09-16_11-57-44
  done: false
  episode_len_mean: 993.82
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.4
  episode_reward_min: -1.0
  episodes_this_iter: 6
  episodes_total: 279
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.5655304588297363
          entropy_coeff: 0.01
          kl: 0.01430842027414017
          policy_loss: -0.04571282756865345
          total_loss: -0.02984971114764771
          vf_explained_var: 0.03619702532887459
          vf_loss: 0.04008757855852074
    num_agent_steps_sampled: 279930
    num_agent_steps_trained: 279930
    num_steps_sampled: 279930
    num_steps_trained: 279930
  iterations_since_restore: 35
  node_ip: 192.168.1.96
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,35,2448.81,279930,1.4,4,-1,993.82


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 287928
  custom_metrics: {}
  date: 2021-09-16_11-58-44
  done: false
  episode_len_mean: 995.75
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.4
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 288
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.5964004716565534
          entropy_coeff: 0.01
          kl: 0.012853221862939142
          policy_loss: -0.03980504870615018
          total_loss: -0.02366230064622497
          vf_explained_var: -0.09410770982503891
          vf_loss: 0.040821431010119084
    num_agent_steps_sampled: 287928
    num_agent_steps_trained: 287928
    num_steps_sampled: 287928
    num_steps_trained: 287928
  iterations_since_restore: 36
  node_ip: 192.168.1.96
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,36,2509.27,287928,1.4,4,-2,995.75


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 295926
  custom_metrics: {}
  date: 2021-09-16_11-59-44
  done: false
  episode_len_mean: 995.75
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.41
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 297
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.5679651652612994
          entropy_coeff: 0.01
          kl: 0.01624336108746099
          policy_loss: -0.03983656619224817
          total_loss: -0.02991190986627693
          vf_explained_var: 0.09360939264297485
          vf_loss: 0.033979971643902804
    num_agent_steps_sampled: 295926
    num_agent_steps_trained: 295926
    num_steps_sampled: 295926
    num_steps_trained: 295926
  iterations_since_restore: 37
  node_ip: 192.168.1.96
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,37,2568.94,295926,1.41,4,-2,995.75


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 303924
  custom_metrics: {}
  date: 2021-09-16_12-00-42
  done: false
  episode_len_mean: 995.75
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.38
  episode_reward_min: -2.0
  episodes_this_iter: 6
  episodes_total: 303
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.5498534576867216
          entropy_coeff: 0.01
          kl: 0.015431217330787875
          policy_loss: -0.04313082046085789
          total_loss: -0.029662701826021876
          vf_explained_var: 0.07323203235864639
          vf_loss: 0.03742352997983045
    num_agent_steps_sampled: 303924
    num_agent_steps_trained: 303924
    num_steps_sampled: 303924
    num_steps_trained: 303924
  iterations_since_restore: 38
  node_ip: 192.168.1.96
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,38,2627.58,303924,1.38,4,-2,995.75


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 311922
  custom_metrics: {}
  date: 2021-09-16_12-01-41
  done: false
  episode_len_mean: 995.75
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.39
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 312
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.535438863436381
          entropy_coeff: 0.01
          kl: 0.015114705404629974
          policy_loss: -0.04348464724157126
          total_loss: -0.02977239047527634
          vf_explained_var: 0.18281039595603943
          vf_loss: 0.03755517336859127
    num_agent_steps_sampled: 311922
    num_agent_steps_trained: 311922
    num_steps_sampled: 311922
    num_steps_trained: 311922
  iterations_since_restore: 39
  node_ip: 192.168.1.96
  num_heal

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,39,2686,311922,1.39,4,-2,995.75


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 319920
  custom_metrics: {}
  date: 2021-09-16_12-02-41
  done: false
  episode_len_mean: 995.75
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.45
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 321
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.469206952792342
          entropy_coeff: 0.01
          kl: 0.015194439552444459
          policy_loss: -0.049896796860842293
          total_loss: -0.020444601712127527
          vf_explained_var: 0.1486934870481491
          vf_loss: 0.05262482107212637
    num_agent_steps_sampled: 319920
    num_agent_steps_trained: 319920
    num_steps_sampled: 319920
    num_steps_trained: 319920
  iterations_since_restore: 40
  node_ip: 192.168.1.96
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,40,2745.76,319920,1.45,4,-2,995.75


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 327918
  custom_metrics: {}
  date: 2021-09-16_12-03-40
  done: false
  episode_len_mean: 995.75
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.47
  episode_reward_min: -2.0
  episodes_this_iter: 6
  episodes_total: 327
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.478336580850745
          entropy_coeff: 0.01
          kl: 0.015472642185064984
          policy_loss: -0.045682557535568075
          total_loss: -0.018550577046229474
          vf_explained_var: 0.03487660363316536
          vf_loss: 0.05036808071296252
    num_agent_steps_sampled: 327918
    num_agent_steps_trained: 327918
    num_steps_sampled: 327918
    num_steps_trained: 327918
  iterations_since_restore: 41
  node_ip: 192.168.1.96
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,41,2804.82,327918,1.47,4,-2,995.75


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 335916
  custom_metrics: {}
  date: 2021-09-16_12-04-40
  done: false
  episode_len_mean: 995.75
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.5
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 336
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.4028151227581884
          entropy_coeff: 0.01
          kl: 0.018059452780608082
          policy_loss: -0.04442179935073019
          total_loss: -0.007923677916167885
          vf_explained_var: 0.011908459477126598
          vf_loss: 0.05872032697693074
    num_agent_steps_sampled: 335916
    num_agent_steps_trained: 335916
    num_steps_sampled: 335916
    num_steps_trained: 335916
  iterations_since_restore: 42
  node_ip: 192.168.1.96
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,42,2865.05,335916,1.5,4,-2,995.75


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 343914
  custom_metrics: {}
  date: 2021-09-16_12-05-39
  done: false
  episode_len_mean: 995.75
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.5
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 345
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.4054943987118302
          entropy_coeff: 0.01
          kl: 0.017019757657668454
          policy_loss: -0.04306054337410837
          total_loss: -0.0306440551534936
          vf_explained_var: 0.08500026911497116
          vf_loss: 0.03476945524446927
    num_agent_steps_sampled: 343914
    num_agent_steps_trained: 343914
    num_steps_sampled: 343914
    num_steps_trained: 343914
  iterations_since_restore: 43
  node_ip: 192.168.1.96
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,43,2923.8,343914,1.5,4,-2,995.75


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 351912
  custom_metrics: {}
  date: 2021-09-16_12-06-39
  done: false
  episode_len_mean: 995.75
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.45
  episode_reward_min: -2.0
  episodes_this_iter: 6
  episodes_total: 351
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.3985872307131366
          entropy_coeff: 0.01
          kl: 0.015898659137850876
          policy_loss: -0.046927300263797087
          total_loss: 0.0056824869776685395
          vf_explained_var: 0.028007186949253082
          vf_loss: 0.07500579303014092
    num_agent_steps_sampled: 351912
    num_agent_steps_trained: 351912
    num_steps_sampled: 351912
    num_steps_trained: 351912
  iterations_since_restore: 44
  node_ip: 192.168.1.96
  num_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,44,2983.51,351912,1.45,4,-2,995.75




Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 359910
  custom_metrics: {}
  date: 2021-09-16_12-07-56
  done: false
  episode_len_mean: 991.18
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.39
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 360
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 2.317290501056179
          entropy_coeff: 0.01
          kl: 0.020316794588724547
          policy_loss: -0.047727202393755476
          total_loss: -0.008768871803117055
          vf_explained_var: 0.09213171154260635
          vf_loss: 0.060099555632182125
    num_agent_steps_sampled: 359910
    num_agent_steps_trained: 359910
    num_steps_sampled: 359910
    num_steps_trained: 359910
  iterations_since_restore: 45
  node_ip: 192.168.1.96
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,45,3061.23,359910,1.39,4,-2,991.18


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 367908
  custom_metrics: {}
  date: 2021-09-16_12-08-57
  done: false
  episode_len_mean: 993.78
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.37
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 369
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.2200308074233353
          entropy_coeff: 0.01
          kl: 0.0166361897862409
          policy_loss: -0.039967240031147676
          total_loss: 0.0035969251456360023
          vf_explained_var: 0.21184197068214417
          vf_loss: 0.06326904366699157
    num_agent_steps_sampled: 367908
    num_agent_steps_trained: 367908
    num_steps_sampled: 367908
    num_steps_trained: 367908
  iterations_since_restore: 46
  node_ip: 192.168.1.96
  num_healthy_workers: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,46,3122.32,367908,1.37,4,-2,993.78


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 375906
  custom_metrics: {}
  date: 2021-09-16_12-09-55
  done: false
  episode_len_mean: 995.43
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.38
  episode_reward_min: -2.0
  episodes_this_iter: 6
  episodes_total: 375
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.3515758587468056
          entropy_coeff: 0.01
          kl: 0.015002638220077068
          policy_loss: -0.04610261258829425
          total_loss: -0.01582119806559496
          vf_explained_var: 0.10789804905653
          vf_loss: 0.05154677653479921
    num_agent_steps_sampled: 375906
    num_agent_steps_trained: 375906
    num_steps_sampled: 375906
    num_steps_trained: 375906
  iterations_since_restore: 47
  node_ip: 192.168.1.96
  num_healthy_workers: 3
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,47,3179.54,375906,1.38,4,-2,995.43


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 383904
  custom_metrics: {}
  date: 2021-09-16_12-10-55
  done: false
  episode_len_mean: 995.43
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.37
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 384
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.4728973716817877
          entropy_coeff: 0.01
          kl: 0.014947996793216452
          policy_loss: -0.030306653223020494
          total_loss: 0.013687725159870361
          vf_explained_var: -0.06941216439008713
          vf_loss: 0.06648115263187818
    num_agent_steps_sampled: 383904
    num_agent_steps_trained: 383904
    num_steps_sampled: 383904
    num_steps_trained: 383904
  iterations_since_restore: 48
  node_ip: 192.168.1.96
  num_healthy_workers:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,48,3239.4,383904,1.37,4,-2,995.43


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 391902
  custom_metrics: {}
  date: 2021-09-16_12-12-05
  done: false
  episode_len_mean: 995.43
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.4
  episode_reward_min: -1.0
  episodes_this_iter: 9
  episodes_total: 393
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.483915621747253
          entropy_coeff: 0.01
          kl: 0.01447686321821552
          policy_loss: -0.04623801046982408
          total_loss: -0.0266540356421022
          vf_explained_var: 0.31743085384368896
          vf_loss: 0.042251601633590756
    num_agent_steps_sampled: 391902
    num_agent_steps_trained: 391902
    num_steps_sampled: 391902
    num_steps_trained: 391902
  iterations_since_restore: 49
  node_ip: 192.168.1.96
  num_healthy_workers: 3
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,49,3309.79,391902,1.4,4,-1,995.43


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 399900
  custom_metrics: {}
  date: 2021-09-16_12-13-16
  done: false
  episode_len_mean: 995.43
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.41
  episode_reward_min: -1.0
  episodes_this_iter: 6
  episodes_total: 399
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.4389907370331465
          entropy_coeff: 0.01
          kl: 0.01508856158915252
          policy_loss: -0.05813720378704289
          total_loss: -0.03912457913881348
          vf_explained_var: 0.12385588884353638
          vf_loss: 0.04113924685779268
    num_agent_steps_sampled: 399900
    num_agent_steps_trained: 399900
    num_steps_sampled: 399900
    num_steps_trained: 399900
  iterations_since_restore: 50
  node_ip: 192.168.1.96
  num_healthy_workers: 3


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,50,3380.39,399900,1.41,4,-1,995.43


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 407898
  custom_metrics: {}
  date: 2021-09-16_12-14-27
  done: false
  episode_len_mean: 995.43
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.44
  episode_reward_min: -1.0
  episodes_this_iter: 9
  episodes_total: 408
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.3423865784880937
          entropy_coeff: 0.01
          kl: 0.014810716685475898
          policy_loss: -0.040200014506536784
          total_loss: 0.019543579844657773
          vf_explained_var: 0.16518180072307587
          vf_loss: 0.08094585226511994
    num_agent_steps_sampled: 407898
    num_agent_steps_trained: 407898
    num_steps_sampled: 407898
    num_steps_trained: 407898
  iterations_since_restore: 51
  node_ip: 192.168.1.96
  num_healthy_workers: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,51,3452.07,407898,1.44,4,-1,995.43


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 415896
  custom_metrics: {}
  date: 2021-09-16_12-15-39
  done: false
  episode_len_mean: 995.43
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.36
  episode_reward_min: -1.0
  episodes_this_iter: 9
  episodes_total: 417
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.3381462324050166
          entropy_coeff: 0.01
          kl: 0.017254841999020464
          policy_loss: -0.04963656614524543
          total_loss: -0.011000218552847702
          vf_explained_var: 0.1370316445827484
          vf_loss: 0.05942958266416725
    num_agent_steps_sampled: 415896
    num_agent_steps_trained: 415896
    num_steps_sampled: 415896
    num_steps_trained: 415896
  iterations_since_restore: 52
  node_ip: 192.168.1.96
  num_healthy_workers: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,52,3523.4,415896,1.36,2,-1,995.43


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 423894
  custom_metrics: {}
  date: 2021-09-16_12-16-51
  done: false
  episode_len_mean: 995.43
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.33
  episode_reward_min: -1.0
  episodes_this_iter: 6
  episodes_total: 423
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.3226546661828156
          entropy_coeff: 0.01
          kl: 0.016866094742635274
          policy_loss: -0.048464063356720634
          total_loss: -0.0043752826840406465
          vf_explained_var: -0.013932964764535427
          vf_loss: 0.06478541356721712
    num_agent_steps_sampled: 423894
    num_agent_steps_trained: 423894
    num_steps_sampled: 423894
    num_steps_trained: 423894
  iterations_since_restore: 53
  node_ip: 192.168.1.96
  num_healthy_worke

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,53,3596.05,423894,1.33,2,-1,995.43


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 431892
  custom_metrics: {}
  date: 2021-09-16_12-18-03
  done: false
  episode_len_mean: 995.43
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.37
  episode_reward_min: -1.0
  episodes_this_iter: 9
  episodes_total: 432
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.3651265184084576
          entropy_coeff: 0.01
          kl: 0.01601845098784543
          policy_loss: -0.04137111582883423
          total_loss: -0.0150731208448809
          vf_explained_var: 0.16249212622642517
          vf_loss: 0.04754649207674427
    num_agent_steps_sampled: 431892
    num_agent_steps_trained: 431892
    num_steps_sampled: 431892
    num_steps_trained: 431892
  iterations_since_restore: 54
  node_ip: 192.168.1.96
  num_healthy_workers: 3
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,54,3667.3,431892,1.37,2,-1,995.43


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 439890
  custom_metrics: {}
  date: 2021-09-16_12-19-14
  done: false
  episode_len_mean: 995.43
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.37
  episode_reward_min: -1.0
  episodes_this_iter: 9
  episodes_total: 441
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.3275750525536076
          entropy_coeff: 0.01
          kl: 0.015783882830079497
          policy_loss: -0.043611852996932564
          total_loss: 0.005765580336853701
          vf_explained_var: -0.05139797553420067
          vf_loss: 0.07028560081236465
    num_agent_steps_sampled: 439890
    num_agent_steps_trained: 439890
    num_steps_sampled: 439890
    num_steps_trained: 439890
  iterations_since_restore: 55
  node_ip: 192.168.1.96
  num_healthy_workers:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,55,3738.73,439890,1.37,2,-1,995.43




Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 447888
  custom_metrics: {}
  date: 2021-09-16_12-20-43
  done: false
  episode_len_mean: 992.36
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.38
  episode_reward_min: -1.0
  episodes_this_iter: 8
  episodes_total: 449
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.300777542462913
          entropy_coeff: 0.01
          kl: 0.017960692248036154
          policy_loss: -0.0423527045643598
          total_loss: 0.01538607582089401
          vf_explained_var: 0.11563196778297424
          vf_loss: 0.07805245231564147
    num_agent_steps_sampled: 447888
    num_agent_steps_trained: 447888
    num_steps_sampled: 447888
    num_steps_trained: 447888
  iterations_since_restore: 56
  node_ip: 192.168.1.96
  num_healthy_workers: 3
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,56,3827.69,447888,1.38,2,-1,992.36




Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 455886
  custom_metrics: {}
  date: 2021-09-16_12-22-13
  done: false
  episode_len_mean: 992.7
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.46
  episode_reward_min: -1.0
  episodes_this_iter: 7
  episodes_total: 456
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.395124021140478
          entropy_coeff: 0.01
          kl: 0.015322126012734945
          policy_loss: -0.04059576728851885
          total_loss: -0.006573879791884333
          vf_explained_var: -0.12497514486312866
          vf_loss: 0.055674807234887554
    num_agent_steps_sampled: 455886
    num_agent_steps_trained: 455886
    num_steps_sampled: 455886
    num_steps_trained: 455886
  iterations_since_restore: 57
  node_ip: 192.168.1.96
  num_healthy_workers: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,57,3917.28,455886,1.46,2,-1,992.7


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 463884
  custom_metrics: {}
  date: 2021-09-16_12-23-24
  done: false
  episode_len_mean: 995.63
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.5
  episode_reward_min: -1.0
  episodes_this_iter: 9
  episodes_total: 465
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.429405577080224
          entropy_coeff: 0.01
          kl: 0.014935927896333216
          policy_loss: -0.03980515303005094
          total_loss: -0.0014811469677595362
          vf_explained_var: -0.07066130638122559
          vf_loss: 0.060377672573323715
    num_agent_steps_sampled: 463884
    num_agent_steps_trained: 463884
    num_steps_sampled: 463884
    num_steps_trained: 463884
  iterations_since_restore: 58
  node_ip: 192.168.1.96
  num_healthy_workers:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,58,3988.67,463884,1.5,2,-1,995.63


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 471882
  custom_metrics: {}
  date: 2021-09-16_12-24-39
  done: false
  episode_len_mean: 995.63
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.56
  episode_reward_min: -1.0
  episodes_this_iter: 7
  episodes_total: 472
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.5071597596650483
          entropy_coeff: 0.01
          kl: 0.012621764841228348
          policy_loss: -0.04837222546063644
          total_loss: -0.046377431991339854
          vf_explained_var: -0.366502583026886
          vf_loss: 0.025173126083488265
    num_agent_steps_sampled: 471882
    num_agent_steps_trained: 471882
    num_steps_sampled: 471882
    num_steps_trained: 471882
  iterations_since_restore: 59
  node_ip: 192.168.1.96
  num_healthy_workers: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,59,4063.69,471882,1.56,2,-1,995.63


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 479880
  custom_metrics: {}
  date: 2021-09-16_12-25-55
  done: false
  episode_len_mean: 995.63
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.55
  episode_reward_min: -1.0
  episodes_this_iter: 8
  episodes_total: 480
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.487966963809024
          entropy_coeff: 0.01
          kl: 0.014439600226899996
          policy_loss: -0.038984028814781094
          total_loss: -0.014582512888216203
          vf_explained_var: -0.39268097281455994
          vf_loss: 0.04711524445437751
    num_agent_steps_sampled: 479880
    num_agent_steps_trained: 479880
    num_steps_sampled: 479880
    num_steps_trained: 479880
  iterations_since_restore: 60
  node_ip: 192.168.1.96
  num_healthy_workers:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,60,4139.48,479880,1.55,2,-1,995.63


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 487878
  custom_metrics: {}
  date: 2021-09-16_12-27-13
  done: false
  episode_len_mean: 995.63
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.58
  episode_reward_min: -1.0
  episodes_this_iter: 9
  episodes_total: 489
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.326210919118697
          entropy_coeff: 0.01
          kl: 0.015902394768551455
          policy_loss: -0.03935419794332276
          total_loss: 0.0035247101638746516
          vf_explained_var: 0.018999241292476654
          vf_loss: 0.06375565777100267
    num_agent_steps_sampled: 487878
    num_agent_steps_trained: 487878
    num_steps_sampled: 487878
    num_steps_trained: 487878
  iterations_since_restore: 61
  node_ip: 192.168.1.96
  num_healthy_workers: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,61,4217.39,487878,1.58,2,-1,995.63


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 495876
  custom_metrics: {}
  date: 2021-09-16_12-28-27
  done: false
  episode_len_mean: 995.63
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.6
  episode_reward_min: -1.0
  episodes_this_iter: 7
  episodes_total: 496
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.2660551858204667
          entropy_coeff: 0.01
          kl: 0.016928274823445565
          policy_loss: -0.042780560857906776
          total_loss: -0.010450509761369997
          vf_explained_var: 0.011205574497580528
          vf_loss: 0.05245136146570408
    num_agent_steps_sampled: 495876
    num_agent_steps_trained: 495876
    num_steps_sampled: 495876
    num_steps_trained: 495876
  iterations_since_restore: 62
  node_ip: 192.168.1.96
  num_healthy_workers:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,62,4291.26,495876,1.6,2,-1,995.63


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 503874
  custom_metrics: {}
  date: 2021-09-16_12-29-44
  done: false
  episode_len_mean: 995.63
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.56
  episode_reward_min: -2.0
  episodes_this_iter: 8
  episodes_total: 504
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.34460996197116
          entropy_coeff: 0.01
          kl: 0.014290694592042694
          policy_loss: -0.039172701331316145
          total_loss: -0.012599768513633359
          vf_explained_var: -0.08215177059173584
          vf_loss: 0.04787542767086071
    num_agent_steps_sampled: 503874
    num_agent_steps_trained: 503874
    num_steps_sampled: 503874
    num_steps_trained: 503874
  iterations_since_restore: 63
  node_ip: 192.168.1.96
  num_healthy_workers: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,63,4368.36,503874,1.56,2,-2,995.63


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 511872
  custom_metrics: {}
  date: 2021-09-16_12-31-00
  done: false
  episode_len_mean: 995.63
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.57
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 513
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.3353955312441754
          entropy_coeff: 0.01
          kl: 0.014673658516890006
          policy_loss: -0.04010880892165005
          total_loss: -0.005891389270583468
          vf_explained_var: -0.03649448975920677
          vf_loss: 0.05537032527686034
    num_agent_steps_sampled: 511872
    num_agent_steps_trained: 511872
    num_steps_sampled: 511872
    num_steps_trained: 511872
  iterations_since_restore: 64
  node_ip: 192.168.1.96
  num_healthy_workers:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,64,4444.51,511872,1.57,2,-2,995.63


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 519870
  custom_metrics: {}
  date: 2021-09-16_12-32-15
  done: false
  episode_len_mean: 995.63
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.62
  episode_reward_min: -2.0
  episodes_this_iter: 7
  episodes_total: 520
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.31117315202631
          entropy_coeff: 0.01
          kl: 0.01641306806161008
          policy_loss: -0.05402066140605878
          total_loss: -0.035695375994809216
          vf_explained_var: 0.07651188224554062
          vf_loss: 0.03897505545317726
    num_agent_steps_sampled: 519870
    num_agent_steps_trained: 519870
    num_steps_sampled: 519870
    num_steps_trained: 519870
  iterations_since_restore: 65
  node_ip: 192.168.1.96
  num_healthy_workers: 3
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,65,4519.42,519870,1.62,2,-2,995.63


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 527868
  custom_metrics: {}
  date: 2021-09-16_12-33-33
  done: false
  episode_len_mean: 995.63
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.68
  episode_reward_min: -2.0
  episodes_this_iter: 8
  episodes_total: 528
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.4035957923499485
          entropy_coeff: 0.01
          kl: 0.015579389551546296
          policy_loss: -0.04530260538762455
          total_loss: -0.034916875339163246
          vf_explained_var: -0.09261863678693771
          vf_loss: 0.032084779109885934
    num_agent_steps_sampled: 527868
    num_agent_steps_trained: 527868
    num_steps_sampled: 527868
    num_steps_trained: 527868
  iterations_since_restore: 66
  node_ip: 192.168.1.96
  num_healthy_workers

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,66,4596.48,527868,1.68,4,-2,995.63


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 535866
  custom_metrics: {}
  date: 2021-09-16_12-34-48
  done: false
  episode_len_mean: 995.63
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.64
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 537
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.306110998122923
          entropy_coeff: 0.01
          kl: 0.01627420611698573
          policy_loss: -0.04129185476590709
          total_loss: -0.02735153546216347
          vf_explained_var: -0.014066220261156559
          vf_loss: 0.034560298568366336
    num_agent_steps_sampled: 535866
    num_agent_steps_trained: 535866
    num_steps_sampled: 535866
    num_steps_trained: 535866
  iterations_since_restore: 67
  node_ip: 192.168.1.96
  num_healthy_workers: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,67,4672.14,535866,1.64,4,-2,995.63




Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 543864
  custom_metrics: {}
  date: 2021-09-16_12-36-25
  done: false
  episode_len_mean: 993.47
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.61
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 546
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.2815889543102634
          entropy_coeff: 0.01
          kl: 0.016683876023871436
          policy_loss: -0.041727524000390245
          total_loss: -0.011318292878868598
          vf_explained_var: -0.06511492282152176
          vf_loss: 0.05072253915958197
    num_agent_steps_sampled: 543864
    num_agent_steps_trained: 543864
    num_steps_sampled: 543864
    num_steps_trained: 543864
  iterations_since_restore: 68
  node_ip: 192.168.1.96
  num_healthy_workers

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,68,4768.72,543864,1.61,4,-2,993.47


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 551862
  custom_metrics: {}
  date: 2021-09-16_12-37-40
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.67
  episode_reward_min: -2.0
  episodes_this_iter: 6
  episodes_total: 552
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.293670138364197
          entropy_coeff: 0.01
          kl: 0.016214503537539147
          policy_loss: -0.05065026009375209
          total_loss: -0.028070664415836975
          vf_explained_var: 0.021726546809077263
          vf_loss: 0.043084121206575024
    num_agent_steps_sampled: 551862
    num_agent_steps_trained: 551862
    num_steps_sampled: 551862
    num_steps_trained: 551862
  iterations_since_restore: 69
  node_ip: 192.168.1.96
  num_healthy_workers: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,69,4844.3,551862,1.67,4,-2,996.1


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 559860
  custom_metrics: {}
  date: 2021-09-16_12-38-55
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.62
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 561
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.31554532781724
          entropy_coeff: 0.01
          kl: 0.016754134380315353
          policy_loss: -0.04662451316397236
          total_loss: -0.033577044081363465
          vf_explained_var: -0.0757332593202591
          vf_loss: 0.03368980137841596
    num_agent_steps_sampled: 559860
    num_agent_steps_trained: 559860
    num_steps_sampled: 559860
    num_steps_trained: 559860
  iterations_since_restore: 70
  node_ip: 192.168.1.96
  num_healthy_workers: 3
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,70,4918.95,559860,1.62,4,-2,996.1


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 567858
  custom_metrics: {}
  date: 2021-09-16_12-40-11
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.55
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 570
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.278489553415647
          entropy_coeff: 0.01
          kl: 0.01857181873451146
          policy_loss: -0.041231416103490175
          total_loss: -0.0037599456131017658
          vf_explained_var: 0.08645354211330414
          vf_loss: 0.057470593051684477
    num_agent_steps_sampled: 567858
    num_agent_steps_trained: 567858
    num_steps_sampled: 567858
    num_steps_trained: 567858
  iterations_since_restore: 71
  node_ip: 192.168.1.96
  num_healthy_workers: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,71,4995.06,567858,1.55,4,-2,996.1


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 575856
  custom_metrics: {}
  date: 2021-09-16_12-41-27
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.58
  episode_reward_min: -2.0
  episodes_this_iter: 6
  episodes_total: 576
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.388044209249558
          entropy_coeff: 0.01
          kl: 0.019018071566591778
          policy_loss: -0.04947592248033572
          total_loss: -0.04856390590899654
          vf_explained_var: 0.010240960866212845
          vf_loss: 0.021939747742726466
    num_agent_steps_sampled: 575856
    num_agent_steps_trained: 575856
    num_steps_sampled: 575856
    num_steps_trained: 575856
  iterations_since_restore: 72
  node_ip: 192.168.1.96
  num_healthy_workers: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,72,5070.39,575856,1.58,4,-2,996.1


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 583854
  custom_metrics: {}
  date: 2021-09-16_12-42-47
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.54
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 585
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.3345918797677565
          entropy_coeff: 0.01
          kl: 0.01555725421462913
          policy_loss: -0.03627482792587891
          total_loss: -0.014948173335462969
          vf_explained_var: -0.20600496232509613
          vf_loss: 0.04233898513090436
    num_agent_steps_sampled: 583854
    num_agent_steps_trained: 583854
    num_steps_sampled: 583854
    num_steps_trained: 583854
  iterations_since_restore: 73
  node_ip: 192.168.1.96
  num_healthy_workers: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,73,5150.15,583854,1.54,4,-2,996.1


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 591852
  custom_metrics: {}
  date: 2021-09-16_12-44-01
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.48
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 594
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.3975882171302714
          entropy_coeff: 0.01
          kl: 0.01757978198438478
          policy_loss: -0.045865760879811424
          total_loss: -0.04541776729046657
          vf_explained_var: -0.2776126563549042
          vf_loss: 0.02178690730445763
    num_agent_steps_sampled: 591852
    num_agent_steps_trained: 591852
    num_steps_sampled: 591852
    num_steps_trained: 591852
  iterations_since_restore: 74
  node_ip: 192.168.1.96
  num_healthy_workers: 3


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,74,5225.06,591852,1.48,4,-2,996.1


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 599850
  custom_metrics: {}
  date: 2021-09-16_12-45-14
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.54
  episode_reward_min: -1.0
  episodes_this_iter: 6
  episodes_total: 600
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.263950816790263
          entropy_coeff: 0.01
          kl: 0.017540802025044842
          policy_loss: -0.041925129147186393
          total_loss: -0.029405374651754735
          vf_explained_var: -0.11528264731168747
          vf_loss: 0.03252814282054177
    num_agent_steps_sampled: 599850
    num_agent_steps_trained: 599850
    num_steps_sampled: 599850
    num_steps_trained: 599850
  iterations_since_restore: 75
  node_ip: 192.168.1.96
  num_healthy_workers: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,75,5297.74,599850,1.54,4,-1,996.1


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 607848
  custom_metrics: {}
  date: 2021-09-16_12-46-31
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.54
  episode_reward_min: -1.0
  episodes_this_iter: 9
  episodes_total: 609
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.2289034885744896
          entropy_coeff: 0.01
          kl: 0.018680712937771442
          policy_loss: -0.04317213136812932
          total_loss: -0.018747635584597487
          vf_explained_var: 0.0183955617249012
          vf_loss: 0.04391142286989908
    num_agent_steps_sampled: 607848
    num_agent_steps_trained: 607848
    num_steps_sampled: 607848
    num_steps_trained: 607848
  iterations_since_restore: 76
  node_ip: 192.168.1.96
  num_healthy_workers: 3


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,76,5374.82,607848,1.54,4,-1,996.1


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 615846
  custom_metrics: {}
  date: 2021-09-16_12-47-49
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.5
  episode_reward_min: -1.0
  episodes_this_iter: 9
  episodes_total: 618
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.2219432873110616
          entropy_coeff: 0.01
          kl: 0.017981485972689244
          policy_loss: -0.044762362285407
          total_loss: -0.008062835891921354
          vf_explained_var: 0.04168079420924187
          vf_loss: 0.05622173513176911
    num_agent_steps_sampled: 615846
    num_agent_steps_trained: 615846
    num_steps_sampled: 615846
    num_steps_trained: 615846
  iterations_since_restore: 77
  node_ip: 192.168.1.96
  num_healthy_workers: 3
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,77,5452.07,615846,1.5,4,-1,996.1


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 623844
  custom_metrics: {}
  date: 2021-09-16_12-49-09
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.41
  episode_reward_min: -1.0
  episodes_this_iter: 6
  episodes_total: 624
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.328553036464158
          entropy_coeff: 0.01
          kl: 0.019976390425434466
          policy_loss: -0.05591437352560861
          total_loss: -0.051167637960965275
          vf_explained_var: -0.04735914617776871
          vf_loss: 0.025035805979108948
    num_agent_steps_sampled: 623844
    num_agent_steps_trained: 623844
    num_steps_sampled: 623844
    num_steps_trained: 623844
  iterations_since_restore: 78
  node_ip: 192.168.1.96
  num_healthy_workers: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,78,5532.43,623844,1.41,2,-1,996.1




Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 631842
  custom_metrics: {}
  date: 2021-09-16_12-50-53
  done: false
  episode_len_mean: 992.29
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.45
  episode_reward_min: -1.0
  episodes_this_iter: 9
  episodes_total: 633
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.278118989031802
          entropy_coeff: 0.01
          kl: 0.01930915653621572
          policy_loss: -0.053118612011393875
          total_loss: -0.0545981671230527
          vf_explained_var: -0.09623102843761444
          vf_loss: 0.018405259733735482
    num_agent_steps_sampled: 631842
    num_agent_steps_trained: 631842
    num_steps_sampled: 631842
    num_steps_trained: 631842
  iterations_since_restore: 79
  node_ip: 192.168.1.96
  num_healthy_workers: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,79,5636.18,631842,1.45,2,-1,992.29


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 639840
  custom_metrics: {}
  date: 2021-09-16_12-52-15
  done: false
  episode_len_mean: 994.85
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.46
  episode_reward_min: -1.0
  episodes_this_iter: 9
  episodes_total: 642
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.3093689540381073
          entropy_coeff: 0.01
          kl: 0.01739836994555741
          policy_loss: -0.041263773735182016
          total_loss: -0.03878351455524323
          vf_explained_var: -0.005081591196358204
          vf_loss: 0.022964194063059686
    num_agent_steps_sampled: 639840
    num_agent_steps_trained: 639840
    num_steps_sampled: 639840
    num_steps_trained: 639840
  iterations_since_restore: 80
  node_ip: 192.168.1.96
  num_healthy_workers

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,80,5718.3,639840,1.46,2,-1,994.85


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 647838
  custom_metrics: {}
  date: 2021-09-16_12-53-41
  done: false
  episode_len_mean: 996.19
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.42
  episode_reward_min: -1.0
  episodes_this_iter: 6
  episodes_total: 648
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.394855607709577
          entropy_coeff: 0.01
          kl: 0.017199367281859595
          policy_loss: -0.041253210868566266
          total_loss: -0.048533000033949655
          vf_explained_var: -0.1285753697156906
          vf_loss: 0.014088859463827834
    num_agent_steps_sampled: 647838
    num_agent_steps_trained: 647838
    num_steps_sampled: 647838
    num_steps_trained: 647838
  iterations_since_restore: 81
  node_ip: 192.168.1.96
  num_healthy_workers:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,81,5804.7,647838,1.42,2,-1,996.19


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 655836
  custom_metrics: {}
  date: 2021-09-16_12-55-02
  done: false
  episode_len_mean: 996.19
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.38
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 657
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.0000000000000016e-05
          entropy: 2.343746301435655
          entropy_coeff: 0.01
          kl: 0.020624471214702254
          policy_loss: -0.055678876775807594
          total_loss: -0.04447417800785393
          vf_explained_var: 0.06630068272352219
          vf_loss: 0.0315484900678125
    num_agent_steps_sampled: 655836
    num_agent_steps_trained: 655836
    num_steps_sampled: 655836
    num_steps_trained: 655836
  iterations_since_restore: 82
  node_ip: 192.168.1.96
  num_healthy_workers: 3


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,82,5885.23,655836,1.38,2,-2,996.19


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 663834
  custom_metrics: {}
  date: 2021-09-16_12-56-18
  done: false
  episode_len_mean: 996.19
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.34
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 666
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22500000000000006
          cur_lr: 5.0000000000000016e-05
          entropy: 2.3456068351704586
          entropy_coeff: 0.01
          kl: 0.016780760691118713
          policy_loss: -0.05063666874763145
          total_loss: -0.0455327956236258
          vf_explained_var: -0.0216665081679821
          vf_loss: 0.024784270639316007
    num_agent_steps_sampled: 663834
    num_agent_steps_trained: 663834
    num_steps_sampled: 663834
    num_steps_trained: 663834
  iterations_since_restore: 83
  node_ip: 192.168.1.96
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,83,5960.83,663834,1.34,2,-2,996.19


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 671832
  custom_metrics: {}
  date: 2021-09-16_12-57-34
  done: false
  episode_len_mean: 996.19
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.41
  episode_reward_min: -2.0
  episodes_this_iter: 6
  episodes_total: 672
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22500000000000006
          cur_lr: 5.0000000000000016e-05
          entropy: 2.4194439667527394
          entropy_coeff: 0.01
          kl: 0.015528270042876994
          policy_loss: -0.041746577913422254
          total_loss: -0.03560349635118919
          vf_explained_var: -0.20198941230773926
          vf_loss: 0.02684366079965814
    num_agent_steps_sampled: 671832
    num_agent_steps_trained: 671832
    num_steps_sampled: 671832
    num_steps_trained: 671832
  iterations_since_restore: 84
  node_ip: 192.168.1.96
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,84,6037.37,671832,1.41,4,-2,996.19


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 679830
  custom_metrics: {}
  date: 2021-09-16_12-58-51
  done: false
  episode_len_mean: 996.19
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.38
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 681
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22500000000000006
          cur_lr: 5.0000000000000016e-05
          entropy: 2.3099025872445877
          entropy_coeff: 0.01
          kl: 0.015479124373707272
          policy_loss: -0.04808092906630488
          total_loss: -0.04497974758907672
          vf_explained_var: 0.023173486813902855
          vf_loss: 0.0227174040930432
    num_agent_steps_sampled: 679830
    num_agent_steps_trained: 679830
    num_steps_sampled: 679830
    num_steps_trained: 679830
  iterations_since_restore: 85
  node_ip: 192.168.1.96
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,85,6114.02,679830,1.38,4,-2,996.19


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 687828
  custom_metrics: {}
  date: 2021-09-16_13-00-10
  done: false
  episode_len_mean: 996.19
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.44
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 690
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22500000000000006
          cur_lr: 5.0000000000000016e-05
          entropy: 2.3273062923903107
          entropy_coeff: 0.01
          kl: 0.016470402905133446
          policy_loss: -0.04300591170597541
          total_loss: -0.03127206518835518
          vf_explained_var: -0.08418731391429901
          vf_loss: 0.0313010694485681
    num_agent_steps_sampled: 687828
    num_agent_steps_trained: 687828
    num_steps_sampled: 687828
    num_steps_trained: 687828
  iterations_since_restore: 86
  node_ip: 192.168.1.96
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,86,6193.36,687828,1.44,4,-2,996.19


Result for PPO_my_env_8f2a6_00000:
  agent_timesteps_total: 695826
  custom_metrics: {}
  date: 2021-09-16_13-01-26
  done: false
  episode_len_mean: 996.19
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 1.42
  episode_reward_min: -2.0
  episodes_this_iter: 6
  episodes_total: 696
  experiment_id: 43f4be814be443d08f2f6e093d3d4a7f
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22500000000000006
          cur_lr: 5.0000000000000016e-05
          entropy: 2.37692408023342
          entropy_coeff: 0.01
          kl: 0.015367304387735058
          policy_loss: -0.04452828146487234
          total_loss: -0.04383521518239411
          vf_explained_var: -0.2191319763660431
          vf_loss: 0.021004663283641884
    num_agent_steps_sampled: 695826
    num_agent_steps_trained: 695826
    num_steps_sampled: 695826
    num_steps_trained: 695826
  iterations_since_restore: 87
  node_ip: 192.168.1.96
  num_heal

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_8f2a6_00000,RUNNING,192.168.1.96:16561,87,6269.01,695826,1.42,4,-2,996.19
