In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0),  
            nn.ELU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(32, 64, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ELU(), 
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Conv2d(256, 512, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder()
        self.encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.mlp = nn.Sequential(
            nn.Linear(features_dim, 256),
            nn.ELU(),
            nn.Linear(256, 256),
            nn.ELU(),
            nn.Linear(256, 256),
            nn.ELU(),
            #nn.Linear(256, 256),
            #nn.ELU(),
            #nn.Linear(256, 256),
            #nn.ELU(),
        )
        self.action_head = nn.Linear(256, action_space.n)
        self.value_head = nn.Linear(256, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.mlp.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
        
        #with torch.no_grad():
        features = self.encoder(obs)
        features = self.mlp(features)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.01
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C32']))
    env = PovOnlyWrapper(env)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [6]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             #"gamma": 0.99,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name":  "PPO C32 pretrained (AngelaCNN + MLP) (3 noops after placement) r: -0.01"
                  }
              }

        },
        loggers=[WandbLogger],
        checkpoint_at_end=True)

2021-10-24 16:30:32,741	INFO wandb.py:170 -- Already logged into W&B.
2021-10-24 16:30:32,787	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


Trial name,status,loc
PPO_my_env_b550d_00000,RUNNING,


[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.5 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=149717)[0m 2021-10-24 16:30:36,726	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=149717)[0m 2021-10-24 16:30:36,727	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-10-24_16-32-05
  done: false
  episode_len_mean: 389.0
  episode_media: {}
  episode_reward_max: -3.789999999999963
  episode_reward_mean: -9.17999999999996
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 2
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8745014508565268
          entropy_coeff: 0.009999999999999998
          kl: 0.006464067955185766
          policy_loss: 0.04833170341120826
          total_loss: 0.32310654123624166
          vf_explained_var: -0.14939051866531372
          vf_loss: 0.3022270370481743
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1,82.3996,1000,-9.18,-3.79,-14.57,389


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-10-24_16-32-37
  done: false
  episode_len_mean: 395.4
  episode_media: {}
  episode_reward_max: -3.789999999999963
  episode_reward_mean: -6.917999999999961
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 5
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8670531272888184
          entropy_coeff: 0.009999999999999998
          kl: 0.006209202833419816
          policy_loss: -0.0887492399248812
          total_loss: 0.2439622550167971
          vf_explained_var: 0.4182874858379364
          vf_loss: 0.36014018290572697
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,2,114.311,2000,-6.918,-3.79,-14.57,395.4


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-10-24_16-33-09
  done: false
  episode_len_mean: 394.85714285714283
  episode_media: {}
  episode_reward_max: -3.7699999999999636
  episode_reward_mean: -6.062857142857103
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 7
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8657160917917888
          entropy_coeff: 0.009999999999999998
          kl: 0.003779723745202447
          policy_loss: -0.1740405605898963
          total_loss: -0.15635093591279453
          vf_explained_var: -0.013682860881090164
          vf_loss: 0.04559083989717894
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,3,146.431,3000,-6.06286,-3.77,-14.57,394.857


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-10-24_16-33-39
  done: false
  episode_len_mean: 398.0
  episode_media: {}
  episode_reward_max: -3.7699999999999636
  episode_reward_mean: -6.583999999999959
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 10
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.849213006761339
          entropy_coeff: 0.009999999999999998
          kl: 0.010146290036660883
          policy_loss: 0.051668213307857515
          total_loss: 0.2647208109498024
          vf_explained_var: 0.5879873633384705
          vf_loss: 0.2405300976915492
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,4,176.14,4000,-6.584,-3.77,-14.57,398


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-10-24_16-34-09
  done: false
  episode_len_mean: 395.8333333333333
  episode_media: {}
  episode_reward_max: -3.759999999999964
  episode_reward_mean: -6.128333333333292
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 12
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.861060121324327
          entropy_coeff: 0.009999999999999998
          kl: 0.008678554752408049
          policy_loss: -0.02731143823928303
          total_loss: -0.045309972514708835
          vf_explained_var: -0.25567740201950073
          vf_loss: 0.009744210629206565
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,5,206.035,5000,-6.12833,-3.76,-14.57,395.833


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-10-24_16-34-39
  done: false
  episode_len_mean: 392.4
  episode_media: {}
  episode_reward_max: -3.759999999999964
  episode_reward_mean: -5.65999999999996
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 15
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.851436882548862
          entropy_coeff: 0.009999999999999998
          kl: 0.006646416449191762
          policy_loss: 0.05685025875767072
          total_loss: 0.03683355405098862
          vf_explained_var: 0.03043030947446823
          vf_loss: 0.00783302330593061
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,6,236.455,6000,-5.66,-3.76,-14.57,392.4


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-10-24_16-35-10
  done: false
  episode_len_mean: 390.8235294117647
  episode_media: {}
  episode_reward_max: -3.649999999999966
  episode_reward_mean: -5.439999999999961
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 17
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.8260545995500355
          entropy_coeff: 0.009999999999999998
          kl: 0.011128397038445372
          policy_loss: -0.09059783932235506
          total_loss: -0.10953800967997974
          vf_explained_var: 0.3399302661418915
          vf_loss: 0.008207533965145963
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,7,267.347,7000,-5.44,-3.65,-14.57,390.824


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-10-24_16-35-44
  done: false
  episode_len_mean: 386.95
  episode_media: {}
  episode_reward_max: -3.47999999999997
  episode_reward_mean: -5.171499999999962
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 20
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.8374527984195286
          entropy_coeff: 0.009999999999999998
          kl: 0.011243179280371596
          policy_loss: -0.06996542380915748
          total_loss: -0.0870641902089119
          vf_explained_var: 0.4547705054283142
          vf_loss: 0.010151443542498681
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,8,301.461,8000,-5.1715,-3.48,-14.57,386.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-10-24_16-36-18
  done: false
  episode_len_mean: 381.5652173913044
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.947826086956485
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 23
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.809396611319648
          entropy_coeff: 0.009999999999999998
          kl: 0.009071953713791981
          policy_loss: -0.049685575813055036
          total_loss: -0.06796610446439849
          vf_explained_var: 0.5552231669425964
          vf_loss: 0.008906242189308007
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,9,335.282,9000,-4.94783,-3.32,-14.57,381.565


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-10-24_16-36-46
  done: false
  episode_len_mean: 380.8076923076923
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.8096153846153475
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 26
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.7855083571539985
          entropy_coeff: 0.009999999999999998
          kl: 0.011010073729681944
          policy_loss: -0.027982488233182164
          total_loss: -0.04590360406372282
          vf_explained_var: 0.22871369123458862
          vf_loss: 0.008832960001503428
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,10,363.493,10000,-4.80962,-3.32,-14.57,380.808


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-10-24_16-37-16
  done: false
  episode_len_mean: 381.0357142857143
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.740357142857106
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 28
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.7117525339126587
          entropy_coeff: 0.009999999999999998
          kl: 0.011356128641852302
          policy_loss: -0.10955192546049754
          total_loss: -0.1251619428396225
          vf_explained_var: 0.16841131448745728
          vf_loss: 0.010371892288741138
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 110

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,11,393.154,11000,-4.74036,-3.32,-14.57,381.036




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-10-24_16-38-08
  done: false
  episode_len_mean: 381.06451612903226
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.650645161290286
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 31
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.714864330821567
          entropy_coeff: 0.009999999999999998
          kl: 0.010023129917394664
          policy_loss: -0.006735204822487301
          total_loss: -0.026600725534889434
          vf_explained_var: 0.47329843044281006
          vf_loss: 0.006280810648523685
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,12,445.215,12000,-4.65065,-3.32,-14.57,381.065


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-10-24_16-38-42
  done: false
  episode_len_mean: 381.3529411764706
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.579411764705845
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 34
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.728043961524963
          entropy_coeff: 0.009999999999999998
          kl: 0.008651623382560923
          policy_loss: 0.012921981430715984
          total_loss: -0.0076889956163035495
          vf_explained_var: 0.5717296600341797
          vf_loss: 0.0058043008288627285
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,13,479.433,13000,-4.57941,-3.32,-14.57,381.353


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-10-24_16-39-17
  done: false
  episode_len_mean: 382.1666666666667
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.544999999999963
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 36
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.6479697518878513
          entropy_coeff: 0.009999999999999998
          kl: 0.013948529684916682
          policy_loss: -0.055645017077525456
          total_loss: -0.07646119793256125
          vf_explained_var: 0.7898272275924683
          vf_loss: 0.004268661454423435
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,14,514.563,14000,-4.545,-3.32,-14.57,382.167


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-10-24_16-39-51
  done: false
  episode_len_mean: 383.02564102564105
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.49794871794868
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 39
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.6317559374703303
          entropy_coeff: 0.009999999999999998
          kl: 0.010209106379050622
          policy_loss: 0.030089853703975676
          total_loss: 0.011907994002103805
          vf_explained_var: 0.6926839351654053
          vf_loss: 0.00711478917962975
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 1500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,15,547.867,15000,-4.49795,-3.32,-14.57,383.026


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-10-24_16-40-25
  done: false
  episode_len_mean: 383.1951219512195
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.46707317073167
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 41
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.6589794397354125
          entropy_coeff: 0.009999999999999998
          kl: 0.01179927709919902
          policy_loss: 0.015539968758821488
          total_loss: -0.005534018907282088
          vf_explained_var: 0.9077219367027283
          vf_loss: 0.004335877529552413
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 1600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,16,581.917,16000,-4.46707,-3.32,-14.57,383.195


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-10-24_16-40-59
  done: false
  episode_len_mean: 383.8636363636364
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.430454545454507
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 44
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.642636757426792
          entropy_coeff: 0.009999999999999998
          kl: 0.012138599385069594
          policy_loss: -0.05333471776296695
          total_loss: -0.07071768748056557
          vf_explained_var: 0.8029434084892273
          vf_loss: 0.007829534760417624
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 1700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,17,616.268,17000,-4.43045,-3.32,-14.57,383.864


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-10-24_16-41-32
  done: false
  episode_len_mean: 383.19565217391306
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.398043478260831
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 46
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.6598140822516547
          entropy_coeff: 0.009999999999999998
          kl: 0.01314943410225845
          policy_loss: -0.12129837075869242
          total_loss: -0.13797996093829473
          vf_explained_var: 0.6058257818222046
          vf_loss: 0.008601609620058702
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 180

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,18,648.663,18000,-4.39804,-3.32,-14.57,383.196


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-10-24_16-42-06
  done: false
  episode_len_mean: 383.40816326530614
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.365510204081595
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 49
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.642875928348965
          entropy_coeff: 0.009999999999999998
          kl: 0.01491966283023416
          policy_loss: 0.0024230032331413695
          total_loss: -0.01852509238653713
          vf_explained_var: 0.5170446038246155
          vf_loss: 0.0039886941124374665
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,19,682.51,19000,-4.36551,-3.32,-14.57,383.408


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-10-24_16-42-40
  done: false
  episode_len_mean: 383.5769230769231
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.3365384615384235
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 52
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.658107781410217
          entropy_coeff: 0.009999999999999998
          kl: 0.012642321298802184
          policy_loss: -0.026712751471334034
          total_loss: -0.04614874612953928
          vf_explained_var: 0.7392212152481079
          vf_loss: 0.0058808495523408055
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,20,716.728,20000,-4.33654,-3.32,-14.57,383.577


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-10-24_16-43-13
  done: false
  episode_len_mean: 384.5925925925926
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.328148148148109
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 54
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.605208420753479
          entropy_coeff: 0.009999999999999998
          kl: 0.02098074258200688
          policy_loss: 0.004875899354616801
          total_loss: -0.01574111721581883
          vf_explained_var: 0.9563828110694885
          vf_loss: 0.0033369949087500572
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 2100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,21,750.117,21000,-4.32815,-3.32,-14.57,384.593


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-10-24_16-43-47
  done: false
  episode_len_mean: 385.0175438596491
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.307017543859612
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 57
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.5804673274358114
          entropy_coeff: 0.009999999999999998
          kl: 0.011689357294014959
          policy_loss: -0.028939448379808
          total_loss: -0.04740100999673207
          vf_explained_var: 0.5149821043014526
          vf_loss: 0.005589706391199595
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,22,784.247,22000,-4.30702,-3.32,-14.57,385.018


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-10-24_16-44-20
  done: false
  episode_len_mean: 386.52542372881356
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.306610169491487
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 59
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.5306269778145682
          entropy_coeff: 0.009999999999999998
          kl: 0.021977847730934603
          policy_loss: -0.06878322313229243
          total_loss: -0.08776106186625031
          vf_explained_var: 0.9014201760292053
          vf_loss: 0.003031755179270274
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 23000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,23,817.229,23000,-4.30661,-3.32,-14.57,386.525




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-10-24_16-45-12
  done: false
  episode_len_mean: 386.78688524590166
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.294754098360617
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 61
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.513058098157247
          entropy_coeff: 0.009999999999999998
          kl: 0.01877348659076528
          policy_loss: -0.13999061369233662
          total_loss: -0.15821494493219587
          vf_explained_var: 0.9409878849983215
          vf_loss: 0.0026822165185068215
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 2400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,24,868.589,24000,-4.29475,-3.32,-14.57,386.787


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-10-24_16-45-45
  done: false
  episode_len_mean: 388.234375
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.289218749999961
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 64
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.4475724432203503
          entropy_coeff: 0.009999999999999998
          kl: 0.016079317312989724
          policy_loss: 0.014534425818257861
          total_loss: -0.00395162014497651
          vf_explained_var: 0.9434055089950562
          vf_loss: 0.0023718344843170294
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,25,901.911,25000,-4.28922,-3.32,-14.57,388.234


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-10-24_16-46-17
  done: false
  episode_len_mean: 389.04545454545456
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.28499999999996
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 66
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.440363878673977
          entropy_coeff: 0.009999999999999998
          kl: 0.014243272084205533
          policy_loss: -0.011007374069756931
          total_loss: -0.029072017305427127
          vf_explained_var: 0.8454114198684692
          vf_loss: 0.003134256101394486
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 260

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,26,934.19,26000,-4.285,-3.32,-14.57,389.045


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-10-24_16-46-47
  done: false
  episode_len_mean: 390.30882352941177
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.286029411764666
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 68
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.3804907480875652
          entropy_coeff: 0.009999999999999998
          kl: 0.015228392343573462
          policy_loss: -0.24414039916462368
          total_loss: -0.2604177412059572
          vf_explained_var: 0.8665573000907898
          vf_loss: 0.00410117306229141
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,27,964.163,27000,-4.28603,-3.32,-14.57,390.309


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-10-24_16-47-18
  done: false
  episode_len_mean: 391.49295774647885
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.28169014084503
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 71
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.452723987897237
          entropy_coeff: 0.009999999999999998
          kl: 0.020798418963762878
          policy_loss: 0.07467944166726537
          total_loss: 0.06350567481584019
          vf_explained_var: 0.734386682510376
          vf_loss: 0.008673829646108465
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,28,994.567,28000,-4.28169,-3.23,-14.57,391.493


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-10-24_16-47-48
  done: false
  episode_len_mean: 393.0821917808219
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.287534246575302
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 73
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3688306464089286
          entropy_coeff: 0.009999999999999998
          kl: 0.012043590189089246
          policy_loss: -0.15328418264786403
          total_loss: -0.16789198997947904
          vf_explained_var: 0.8074527382850647
          vf_loss: 0.005015787289529625
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained: 2900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,29,1024.83,29000,-4.28753,-3.23,-14.57,393.082


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-10-24_16-48-18
  done: false
  episode_len_mean: 393.5394736842105
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.278026315789433
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 76
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.342338877254062
          entropy_coeff: 0.009999999999999998
          kl: 0.011303990309079331
          policy_loss: 0.060760860476228924
          total_loss: 0.05192676120334201
          vf_explained_var: 0.4193750023841858
          vf_loss: 0.010774191976007487
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,30,1054.94,30000,-4.27803,-3.23,-14.57,393.539


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-10-24_16-48-46
  done: false
  episode_len_mean: 394.87179487179486
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.282564102564062
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 78
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.343442251947191
          entropy_coeff: 0.009999999999999998
          kl: 0.016221147535997037
          policy_loss: 0.10132784942785898
          total_loss: 0.08869644237889183
          vf_explained_var: 0.4140292704105377
          vf_loss: 0.005328380153191069
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 31000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,31,1082.46,31000,-4.28256,-3.23,-14.57,394.872


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-10-24_16-49-16
  done: false
  episode_len_mean: 395.575
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.281249999999959
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 80
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2792996803919476
          entropy_coeff: 0.009999999999999998
          kl: 0.011268730925576767
          policy_loss: -0.13213372627894085
          total_loss: -0.1421778076224857
          vf_explained_var: 0.2988516092300415
          vf_loss: 0.008945719809788796
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,32,1112.16,32000,-4.28125,-3.23,-14.57,395.575


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-10-24_16-49-47
  done: false
  episode_len_mean: 396.7710843373494
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.28144578313249
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 83
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2264593124389647
          entropy_coeff: 0.009999999999999998
          kl: 0.009562286938265045
          policy_loss: 0.026317649748590258
          total_loss: 0.015411939720312755
          vf_explained_var: 0.2976221740245819
          vf_loss: 0.00813161081298151
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 33000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,33,1143.21,33000,-4.28145,-3.23,-14.57,396.771


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-10-24_16-50-17
  done: false
  episode_len_mean: 397.5529411764706
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.281882352941135
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 85
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.154581750763787
          entropy_coeff: 0.009999999999999998
          kl: 0.010238133431608709
          policy_loss: 0.07149455828799142
          total_loss: 0.05855126447147793
          vf_explained_var: 0.5043545365333557
          vf_loss: 0.005147155107180071
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained: 34000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,34,1173.8,34000,-4.28188,-3.23,-14.57,397.553


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-10-24_16-50-49
  done: false
  episode_len_mean: 397.6363636363636
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.272272727272687
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 88
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.144054741329617
          entropy_coeff: 0.009999999999999998
          kl: 0.008682500401496778
          policy_loss: -0.013468047810925378
          total_loss: -0.02171232286426756
          vf_explained_var: 0.41864296793937683
          vf_loss: 0.010265925884272695
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 350

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,35,1205.09,35000,-4.27227,-3.23,-14.57,397.636




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-10-24_16-51-36
  done: false
  episode_len_mean: 397.74444444444447
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.266777777777737
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 90
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1403938982221815
          entropy_coeff: 0.009999999999999998
          kl: 0.00824945145423022
          policy_loss: 0.0837372480167283
          total_loss: 0.07088299691677094
          vf_explained_var: 0.517692506313324
          vf_loss: 0.005765495345177543
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,36,1252.19,36000,-4.26678,-3.23,-14.57,397.744


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-10-24_16-52-09
  done: false
  episode_len_mean: 398.5
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.268043478260829
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 92
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.182839783032735
          entropy_coeff: 0.009999999999999998
          kl: 0.009828442026888205
          policy_loss: -0.08935154469476805
          total_loss: -0.09634461932712131
          vf_explained_var: 0.0700676217675209
          vf_loss: 0.011518225839568508
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,37,1285.68,37000,-4.26804,-3.23,-14.57,398.5


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-10-24_16-52-40
  done: false
  episode_len_mean: 399.7157894736842
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.271263157894695
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 3
  episodes_total: 95
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.179963665538364
          entropy_coeff: 0.009999999999999998
          kl: 0.01071076478026135
          policy_loss: -0.003799470596843296
          total_loss: -0.013246586587693955
          vf_explained_var: 0.0646025761961937
          vf_loss: 0.008737635881536537
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 3800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,38,1316.11,38000,-4.27126,-3.23,-14.57,399.716


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-10-24_16-53-11
  done: false
  episode_len_mean: 399.54639175257734
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.263917525773154
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 97
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.208504091368781
          entropy_coeff: 0.009999999999999998
          kl: 0.011886712216975858
          policy_loss: -0.0674435771173901
          total_loss: -0.07422008299165302
          vf_explained_var: 0.35343480110168457
          vf_loss: 0.011296769119023034
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 3900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,39,1347.77,39000,-4.26392,-3.23,-14.57,399.546


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-10-24_16-53-41
  done: false
  episode_len_mean: 399.72727272727275
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.260303030302989
  episode_reward_min: -14.569999999999958
  episodes_this_iter: 2
  episodes_total: 99
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.203821478949653
          entropy_coeff: 0.009999999999999998
          kl: 0.010215233181793269
          policy_loss: -0.10145912915468216
          total_loss: -0.10908866872390112
          vf_explained_var: 0.1000121459364891
          vf_loss: 0.01096103334033008
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,40,1377.27,40000,-4.2603,-3.23,-14.57,399.727


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-10-24_16-54-14
  done: false
  episode_len_mean: 400.84
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.162999999999958
  episode_reward_min: -9.879999999999947
  episodes_this_iter: 3
  episodes_total: 102
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1771748357348972
          entropy_coeff: 0.009999999999999998
          kl: 0.006780963908577893
          policy_loss: 0.013348874863651064
          total_loss: 0.0003829019765059153
          vf_explained_var: 0.6243898272514343
          vf_loss: 0.006517200451344252
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,41,1410.25,41000,-4.163,-3.23,-9.88,400.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-10-24_16-54-46
  done: false
  episode_len_mean: 401.77
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.148099999999958
  episode_reward_min: -9.879999999999947
  episodes_this_iter: 2
  episodes_total: 104
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.174975722365909
          entropy_coeff: 0.009999999999999998
          kl: 0.012298560523674555
          policy_loss: -0.10249131056997511
          total_loss: -0.11347530889842246
          vf_explained_var: 0.6980873942375183
          vf_loss: 0.006614990001576694
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,42,1442,42000,-4.1481,-3.23,-9.88,401.77


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-10-24_16-55-17
  done: false
  episode_len_mean: 401.89
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.131299999999959
  episode_reward_min: -9.879999999999947
  episodes_this_iter: 2
  episodes_total: 106
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1102527618408202
          entropy_coeff: 0.009999999999999998
          kl: 0.008925633118210562
          policy_loss: -0.11094785432020823
          total_loss: -0.11995704472064972
          vf_explained_var: 0.6036320924758911
          vf_loss: 0.009080932941287756
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,43,1473.63,43000,-4.1313,-3.23,-9.88,401.89


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-10-24_16-55-52
  done: false
  episode_len_mean: 402.0
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.019999999999959
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 109
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2024339993794757
          entropy_coeff: 0.009999999999999998
          kl: 0.010142526724833248
          policy_loss: 0.08538372500075234
          total_loss: 0.07463518761926227
          vf_explained_var: 0.5885829329490662
          vf_loss: 0.007852699193689559
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,44,1507.91,44000,-4.02,-3.23,-5.19,402


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-10-24_16-56-22
  done: false
  episode_len_mean: 402.52
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.025199999999959
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 2
  episodes_total: 111
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.185191528002421
          entropy_coeff: 0.009999999999999998
          kl: 0.012611785929424595
          policy_loss: -0.15517599599228965
          total_loss: -0.1648997873067856
          vf_explained_var: 0.5824286937713623
          vf_loss: 0.007871644761568557
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,45,1538.32,45000,-4.0252,-3.23,-5.19,402.52


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-10-24_16-56-54
  done: false
  episode_len_mean: 402.21
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.022099999999958
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 114
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2415825181537206
          entropy_coeff: 0.009999999999999998
          kl: 0.01087047312137766
          policy_loss: -0.037042364809248184
          total_loss: -0.04734663317600886
          vf_explained_var: 0.5038846731185913
          vf_loss: 0.008442775232510434
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,46,1570.39,46000,-4.0221,-3.23,-5.19,402.21


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-10-24_16-57-24
  done: false
  episode_len_mean: 402.97
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.029699999999958
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 2
  episodes_total: 116
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0705522815386455
          entropy_coeff: 0.009999999999999998
          kl: 0.009868319170686925
          policy_loss: -0.11270238558451334
          total_loss: -0.12090989384386275
          vf_explained_var: 0.3475642800331116
          vf_loss: 0.009167456019268785
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,47,1600.41,47000,-4.0297,-3.23,-5.19,402.97


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-10-24_16-57-56
  done: false
  episode_len_mean: 403.65
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.036499999999958
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 119
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2498056411743166
          entropy_coeff: 0.009999999999999998
          kl: 0.009003410833619243
          policy_loss: -0.04453725558188226
          total_loss: -0.054479464971356924
          vf_explained_var: 0.42537450790405273
          vf_loss: 0.009517195110998324
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,48,1632.59,48000,-4.0365,-3.23,-5.19,403.65




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-10-24_16-58-44
  done: false
  episode_len_mean: 404.39
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.043899999999958
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 122
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1899481534957888
          entropy_coeff: 0.009999999999999998
          kl: 0.012266112477788216
          policy_loss: 0.09350036631027857
          total_loss: 0.08215868108802371
          vf_explained_var: 0.5285348892211914
          vf_loss: 0.006417982457949418
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,49,1680.16,49000,-4.0439,-3.23,-5.19,404.39


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-10-24_16-59-17
  done: false
  episode_len_mean: 404.32
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.043199999999958
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 125
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2172703398598563
          entropy_coeff: 0.009999999999999998
          kl: 0.006500860365237693
          policy_loss: 0.04687304712004132
          total_loss: 0.03671799815363354
          vf_explained_var: 0.22799678146839142
          vf_loss: 0.009823611605032865
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,50,1712.61,50000,-4.0432,-3.23,-5.19,404.32


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-10-24_16-59-47
  done: false
  episode_len_mean: 404.85
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.048499999999958
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 2
  episodes_total: 127
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0900234513812594
          entropy_coeff: 0.009999999999999998
          kl: 0.014873252075403897
          policy_loss: 0.0006163266797860463
          total_loss: -0.008427149719662137
          vf_explained_var: 0.3216876983642578
          vf_loss: 0.006837035368921028
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,51,1743.03,51000,-4.0485,-3.23,-5.19,404.85


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-10-24_17-00-18
  done: false
  episode_len_mean: 405.23
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.052299999999957
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 130
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.12648290793101
          entropy_coeff: 0.009999999999999998
          kl: 0.011735798203122942
          policy_loss: 0.0848047062754631
          total_loss: 0.07513796869251463
          vf_explained_var: 0.5444703698158264
          vf_loss: 0.007637257182310956
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,52,1773.67,52000,-4.0523,-3.23,-5.19,405.23


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-10-24_17-00-48
  done: false
  episode_len_mean: 405.89
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.058899999999958
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 2
  episodes_total: 132
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1497144010331897
          entropy_coeff: 0.009999999999999998
          kl: 0.011252605012021159
          policy_loss: 0.015233108070161608
          total_loss: 0.003974810656574037
          vf_explained_var: 0.3166044354438782
          vf_loss: 0.006441092347571006
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,53,1803.49,53000,-4.0589,-3.23,-5.19,405.89


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-10-24_17-01-18
  done: false
  episode_len_mean: 405.87
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.058699999999957
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 135
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.123722928100162
          entropy_coeff: 0.009999999999999998
          kl: 0.008997788878114773
          policy_loss: 0.03958675406045384
          total_loss: 0.03314695507287979
          vf_explained_var: 0.24758554995059967
          vf_loss: 0.011760675709228962
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,54,1834.25,54000,-4.0587,-3.23,-5.19,405.87


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-10-24_17-01-49
  done: false
  episode_len_mean: 406.07
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.060699999999957
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 2
  episodes_total: 137
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.102507860130734
          entropy_coeff: 0.009999999999999998
          kl: 0.012541333498967185
          policy_loss: -0.035352697802914515
          total_loss: -0.042853817840417224
          vf_explained_var: 0.40068408846855164
          vf_loss: 0.009291257428574479
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,55,1864.65,55000,-4.0607,-3.23,-5.19,406.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-10-24_17-02-19
  done: false
  episode_len_mean: 405.52
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.0551999999999575
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 140
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0978654623031616
          entropy_coeff: 0.009999999999999998
          kl: 0.004808920034010278
          policy_loss: 0.06038766809635692
          total_loss: 0.05275454173485438
          vf_explained_var: 0.2738806903362274
          vf_loss: 0.011722519668465894
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,56,1895.24,56000,-4.0552,-3.23,-5.19,405.52


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-10-24_17-02-49
  done: false
  episode_len_mean: 406.78
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.0677999999999574
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 2
  episodes_total: 142
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1042529503504435
          entropy_coeff: 0.009999999999999998
          kl: 0.01537725597722293
          policy_loss: 0.13124403125709957
          total_loss: 0.11703440149625143
          vf_explained_var: 0.05786760523915291
          vf_loss: 0.0042379867754385086
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,57,1925.16,57000,-4.0678,-3.23,-5.19,406.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-10-24_17-03-21
  done: false
  episode_len_mean: 406.19
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.061899999999957
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 145
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0232395437028674
          entropy_coeff: 0.009999999999999998
          kl: 0.00944839549839427
          policy_loss: 0.05225651106900639
          total_loss: 0.04419601460297903
          vf_explained_var: 0.10645236074924469
          vf_loss: 0.010577483438990183
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,58,1957.09,58000,-4.0619,-3.23,-5.19,406.19


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-10-24_17-03-52
  done: false
  episode_len_mean: 405.9
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.0589999999999575
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 2
  episodes_total: 147
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.062370779779222
          entropy_coeff: 0.009999999999999998
          kl: 0.010921950706014469
          policy_loss: -0.09490619566705492
          total_loss: -0.10280030750566059
          vf_explained_var: 0.05978868156671524
          vf_loss: 0.01088651656528883
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,59,1988.08,59000,-4.059,-3.23,-5.19,405.9




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-10-24_17-04-43
  done: false
  episode_len_mean: 405.76
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.057599999999957
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 150
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9587291452619764
          entropy_coeff: 0.009999999999999998
          kl: 0.010482137171115763
          policy_loss: -0.08771433474289046
          total_loss: -0.09220622587535117
          vf_explained_var: 0.1551409810781479
          vf_loss: 0.013326540134019321
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,60,2038.89,60000,-4.0576,-3.23,-5.19,405.76


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-10-24_17-05-16
  done: false
  episode_len_mean: 405.85
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.058499999999958
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 153
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9393729209899901
          entropy_coeff: 0.009999999999999998
          kl: 0.00826451435795603
          policy_loss: 0.02582564883761936
          total_loss: 0.015735702547762128
          vf_explained_var: 0.16919885575771332
          vf_loss: 0.007909146242632737
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,61,2071.5,61000,-4.0585,-3.23,-5.19,405.85


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-10-24_17-05-49
  done: false
  episode_len_mean: 405.5
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.054999999999957
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 2
  episodes_total: 155
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.950359214676751
          entropy_coeff: 0.009999999999999998
          kl: 0.009969146936975943
          policy_loss: -0.07239154279232025
          total_loss: -0.08211904466152191
          vf_explained_var: 0.2519068121910095
          vf_loss: 0.008093792214640416
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,62,2105.06,62000,-4.055,-3.23,-5.19,405.5


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-10-24_17-06-22
  done: false
  episode_len_mean: 404.54
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.045399999999957
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 158
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9164819492234124
          entropy_coeff: 0.009999999999999998
          kl: 0.012945675846327663
          policy_loss: 0.0335510555240843
          total_loss: 0.02590430991517173
          vf_explained_var: 0.15824058651924133
          vf_loss: 0.00933349059907616
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,63,2137.56,63000,-4.0454,-3.23,-5.19,404.54


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-10-24_17-06-53
  done: false
  episode_len_mean: 404.87
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.0486999999999576
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 2
  episodes_total: 160
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9814184308052063
          entropy_coeff: 0.009999999999999998
          kl: 0.017001334533203287
          policy_loss: -0.11688916385173798
          total_loss: -0.12479049993885888
          vf_explained_var: -0.14685875177383423
          vf_loss: 0.009043870337255714
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,64,2168.26,64000,-4.0487,-3.23,-5.19,404.87


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-10-24_17-07-24
  done: false
  episode_len_mean: 404.96
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.049599999999958
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 163
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8552635934617785
          entropy_coeff: 0.009999999999999998
          kl: 0.015769304133691698
          policy_loss: 0.023376041485203638
          total_loss: 0.016569613582558103
          vf_explained_var: 0.336222380399704
          vf_loss: 0.009085138495235394
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,65,2199.97,65000,-4.0496,-3.23,-5.19,404.96


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-10-24_17-07-57
  done: false
  episode_len_mean: 404.54
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.045399999999957
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 2
  episodes_total: 165
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9193514188130696
          entropy_coeff: 0.009999999999999998
          kl: 0.016776083624512793
          policy_loss: -0.11739676263597276
          total_loss: -0.12348563108179304
          vf_explained_var: -0.014098601415753365
          vf_loss: 0.01027367874695402
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,66,2232.38,66000,-4.0454,-3.23,-5.19,404.54


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-10-24_17-08-30
  done: false
  episode_len_mean: 402.76
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.027599999999957
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 168
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.88850161102083
          entropy_coeff: 0.009999999999999998
          kl: 0.013818248365022849
          policy_loss: 0.0158722884953022
          total_loss: 0.00828026251660453
          vf_explained_var: 0.4219649136066437
          vf_loss: 0.008961159612065077
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,67,2265.64,67000,-4.0276,-3.23,-5.19,402.76


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-10-24_17-09-03
  done: false
  episode_len_mean: 402.0
  episode_media: {}
  episode_reward_max: -3.2699999999999743
  episode_reward_mean: -4.019999999999959
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 171
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9041373504532708
          entropy_coeff: 0.009999999999999998
          kl: 0.010265477844894495
          policy_loss: 0.04388604544930988
          total_loss: 0.033215171015924874
          vf_explained_var: -0.2577526867389679
          vf_loss: 0.006638199830518311
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,68,2298.08,68000,-4.02,-3.27,-5.19,402


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-10-24_17-09-35
  done: false
  episode_len_mean: 400.31
  episode_media: {}
  episode_reward_max: -3.2699999999999743
  episode_reward_mean: -4.003099999999958
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 2
  episodes_total: 173
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.938566439681583
          entropy_coeff: 0.009999999999999998
          kl: 0.012302553969486003
          policy_loss: -0.08584684895144569
          total_loss: -0.0920879809392823
          vf_explained_var: -0.09523559361696243
          vf_loss: 0.011068474356498983
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,69,2330.86,69000,-4.0031,-3.27,-5.19,400.31


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-10-24_17-10-06
  done: false
  episode_len_mean: 400.38
  episode_media: {}
  episode_reward_max: -3.2699999999999743
  episode_reward_mean: -4.003799999999958
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 2
  episodes_total: 175
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8377331442303129
          entropy_coeff: 0.009999999999999998
          kl: 0.012617664604869728
          policy_loss: -0.09889679468340344
          total_loss: -0.10145137790176603
          vf_explained_var: -0.31011882424354553
          vf_loss: 0.013693519097028508
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,70,2361.05,70000,-4.0038,-3.27,-5.19,400.38


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-10-24_17-10-36
  done: false
  episode_len_mean: 399.42
  episode_media: {}
  episode_reward_max: -3.2699999999999743
  episode_reward_mean: -3.994199999999958
  episode_reward_min: -4.759999999999943
  episodes_this_iter: 3
  episodes_total: 178
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.943901452753279
          entropy_coeff: 0.009999999999999998
          kl: 0.009985887019563804
          policy_loss: -0.10103801563382149
          total_loss: -0.10736364101370176
          vf_explained_var: 0.25731655955314636
          vf_loss: 0.011428273172997352
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,71,2390.96,71000,-3.9942,-3.27,-4.76,399.42




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-10-24_17-11-24
  done: false
  episode_len_mean: 397.74
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.977399999999959
  episode_reward_min: -4.87999999999994
  episodes_this_iter: 3
  episodes_total: 181
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8733591145939297
          entropy_coeff: 0.009999999999999998
          kl: 0.03659713302873951
          policy_loss: -0.043128147059016755
          total_loss: -0.045462395747502646
          vf_explained_var: -0.05293325334787369
          vf_loss: 0.010223573586295566
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,72,2438.96,72000,-3.9774,-2.96,-4.88,397.74


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-10-24_17-11-52
  done: false
  episode_len_mean: 398.29
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.9828999999999586
  episode_reward_min: -5.009999999999938
  episodes_this_iter: 2
  episodes_total: 183
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9381276978386772
          entropy_coeff: 0.009999999999999998
          kl: 0.01544921477802548
          policy_loss: -0.093234374291367
          total_loss: -0.10073454214466943
          vf_explained_var: 0.09560343623161316
          vf_loss: 0.007970528283234064
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,73,2467.82,73000,-3.9829,-2.96,-5.01,398.29


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-10-24_17-12-25
  done: false
  episode_len_mean: 397.76
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.977599999999959
  episode_reward_min: -5.099999999999936
  episodes_this_iter: 3
  episodes_total: 186
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8730070352554322
          entropy_coeff: 0.009999999999999998
          kl: 0.008519367822744251
          policy_loss: 0.05555422670311398
          total_loss: 0.04674337307612101
          vf_explained_var: 0.165708526968956
          vf_loss: 0.007762748339316911
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,74,2500.1,74000,-3.9776,-2.96,-5.1,397.76


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-10-24_17-12-55
  done: false
  episode_len_mean: 398.51
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.985099999999959
  episode_reward_min: -5.099999999999936
  episodes_this_iter: 2
  episodes_total: 188
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 2.044386715359158
          entropy_coeff: 0.009999999999999998
          kl: 0.01093842220809203
          policy_loss: 0.17733696268664467
          total_loss: 0.16084573434458838
          vf_explained_var: 0.061534538865089417
          vf_loss: 0.0011838455134744032
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,75,2530.25,75000,-3.9851,-2.96,-5.1,398.51


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-10-24_17-13-27
  done: false
  episode_len_mean: 396.6
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.9659999999999593
  episode_reward_min: -5.099999999999936
  episodes_this_iter: 3
  episodes_total: 191
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7966269003020392
          entropy_coeff: 0.009999999999999998
          kl: 0.0073595756708575676
          policy_loss: 0.06479120287630293
          total_loss: 0.057850943754116695
          vf_explained_var: 0.051376041024923325
          vf_loss: 0.009163116663694381
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,76,2562.02,76000,-3.966,-2.96,-5.1,396.6


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-10-24_17-13-57
  done: false
  episode_len_mean: 396.14
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.9613999999999594
  episode_reward_min: -5.099999999999936
  episodes_this_iter: 2
  episodes_total: 193
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9459219800101386
          entropy_coeff: 0.009999999999999998
          kl: 0.011981259289114614
          policy_loss: -0.10871035754680633
          total_loss: -0.1145601620276769
          vf_explained_var: -0.14921227097511292
          vf_loss: 0.010576658864091667
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,77,2592.16,77000,-3.9614,-2.96,-5.1,396.14


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-10-24_17-14-26
  done: false
  episode_len_mean: 396.11
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.9610999999999588
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 196
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8641368879212274
          entropy_coeff: 0.009999999999999998
          kl: 0.012407456196997095
          policy_loss: 0.01252084324757258
          total_loss: 0.007624820040331946
          vf_explained_var: -0.12456811964511871
          vf_loss: 0.01060470883255928
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,78,2620.89,78000,-3.9611,-2.96,-5.42,396.11


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-10-24_17-14-55
  done: false
  episode_len_mean: 396.78
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.9677999999999596
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 2
  episodes_total: 198
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9696638610627917
          entropy_coeff: 0.009999999999999998
          kl: 0.01201201509611926
          policy_loss: 0.10767259879244698
          total_loss: 0.09788491361671024
          vf_explained_var: 0.09601005911827087
          vf_loss: 0.006868412456889119
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,79,2650.3,79000,-3.9678,-2.96,-5.42,396.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-10-24_17-15-26
  done: false
  episode_len_mean: 397.07
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.9706999999999595
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 2
  episodes_total: 200
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8377525409062703
          entropy_coeff: 0.009999999999999998
          kl: 0.015588897512443387
          policy_loss: -0.09990538226233589
          total_loss: -0.10407506806982889
          vf_explained_var: 0.06204722821712494
          vf_loss: 0.01026190149390863
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,80,2680.91,80000,-3.9707,-2.96,-5.42,397.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-10-24_17-15-58
  done: false
  episode_len_mean: 396.33
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.9632999999999594
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 203
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.818424802356296
          entropy_coeff: 0.009999999999999998
          kl: 0.012596428262930122
          policy_loss: 0.05100206567181481
          total_loss: 0.04605206607116593
          vf_explained_var: 0.21432800590991974
          vf_loss: 0.010045777600155108
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,81,2712.6,81000,-3.9633,-2.96,-5.42,396.33


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-10-24_17-16-29
  done: false
  episode_len_mean: 393.7
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.9369999999999603
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 206
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8764325724707709
          entropy_coeff: 0.009999999999999998
          kl: 0.009488343554844292
          policy_loss: 0.10013972206248177
          total_loss: 0.09054149157471127
          vf_explained_var: 0.2312106043100357
          vf_loss: 0.006764357373079595
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,82,2744.39,82000,-3.937,-2.96,-5.42,393.7


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-10-24_17-17-01
  done: false
  episode_len_mean: 393.04
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.9303999999999597
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 2
  episodes_total: 208
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8893257604704963
          entropy_coeff: 0.009999999999999998
          kl: 0.007898934378177083
          policy_loss: -0.0849395215511322
          total_loss: -0.08935163484679329
          vf_explained_var: 0.23969775438308716
          vf_loss: 0.012481726762942142
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,83,2776.21,83000,-3.9304,-2.96,-5.42,393.04




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-10-24_17-17-53
  done: false
  episode_len_mean: 390.8
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.90799999999996
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 211
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8107988529735142
          entropy_coeff: 0.009999999999999998
          kl: 0.007125796632714549
          policy_loss: -0.10854787528514862
          total_loss: -0.10879669710993767
          vf_explained_var: 0.08140671253204346
          vf_loss: 0.01605544565245509
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,84,2827.71,84000,-3.908,-2.96,-5.42,390.8


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-10-24_17-18-25
  done: false
  episode_len_mean: 390.83
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.90829999999996
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 214
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7316614680820042
          entropy_coeff: 0.009999999999999998
          kl: 0.01088020911304868
          policy_loss: -0.03462058107058207
          total_loss: -0.03856164399120543
          vf_explained_var: -0.1332385241985321
          vf_loss: 0.010621497673532253
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,85,2859.73,85000,-3.9083,-2.96,-5.42,390.83


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-10-24_17-18-58
  done: false
  episode_len_mean: 388.44
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.884399999999961
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 217
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7942927718162536
          entropy_coeff: 0.009999999999999998
          kl: 0.007879428869502754
          policy_loss: 0.037854845739073224
          total_loss: 0.03292896441287464
          vf_explained_var: 0.11526079475879669
          vf_loss: 0.011022565308828941
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,86,2893.27,86000,-3.8844,-2.96,-5.42,388.44


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-10-24_17-19-33
  done: false
  episode_len_mean: 387.79
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.8778999999999617
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 220
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.664520760377248
          entropy_coeff: 0.009999999999999998
          kl: 0.0103889806266774
          policy_loss: 0.03430175847477383
          total_loss: 0.03225829799969991
          vf_explained_var: 0.010043938644230366
          vf_loss: 0.01197203576254348
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,87,2927.82,87000,-3.8779,-2.96,-5.42,387.79


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-10-24_17-20-07
  done: false
  episode_len_mean: 386.26
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.862599999999962
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 223
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6969765861829122
          entropy_coeff: 0.009999999999999998
          kl: 0.007016847394577224
          policy_loss: 0.033681086864736344
          total_loss: 0.030624007682005565
          vf_explained_var: 0.20668068528175354
          vf_loss: 0.012136545332355631
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 88000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,88,2961.81,88000,-3.8626,-2.96,-5.42,386.26


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-10-24_17-20-43
  done: false
  episode_len_mean: 384.58
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.8457999999999624
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 226
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6798178593317667
          entropy_coeff: 0.009999999999999998
          kl: 0.00708950841216457
          policy_loss: -0.0018362965848710802
          total_loss: -0.005163195398118761
          vf_explained_var: 0.19582489132881165
          vf_loss: 0.011676746709013565
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,89,2997.85,89000,-3.8458,-2.96,-5.42,384.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-10-24_17-21-19
  done: false
  episode_len_mean: 381.68
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.816799999999962
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 229
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6503515005111695
          entropy_coeff: 0.009999999999999998
          kl: 0.010373389350666134
          policy_loss: -0.10662673513094584
          total_loss: -0.10718972186247508
          vf_explained_var: 0.4066890776157379
          vf_loss: 0.01331476214238339
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,90,3033.8,90000,-3.8168,-2.96,-5.42,381.68


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-10-24_17-21-53
  done: false
  episode_len_mean: 380.42
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.8041999999999625
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 232
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6402185757954915
          entropy_coeff: 0.009999999999999998
          kl: 0.02028321486502453
          policy_loss: 0.0079342739449607
          total_loss: 0.010577237109343211
          vf_explained_var: 0.35505348443984985
          vf_loss: 0.013910961171819104
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,91,3068.09,91000,-3.8042,-2.96,-5.42,380.42


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-10-24_17-22-25
  done: false
  episode_len_mean: 379.44
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.794399999999963
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 235
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.6576840864287483
          entropy_coeff: 0.009999999999999998
          kl: 0.009084089151707787
          policy_loss: -0.002367966953251097
          total_loss: -0.003026299840874142
          vf_explained_var: 0.2820069491863251
          vf_loss: 0.012469395145631602
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,92,3100.27,92000,-3.7944,-2.96,-5.42,379.44


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-10-24_17-22-56
  done: false
  episode_len_mean: 379.03
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.7902999999999634
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 2
  episodes_total: 237
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.644471369849311
          entropy_coeff: 0.009999999999999998
          kl: 0.012507251672156074
          policy_loss: -0.13755821916792127
          total_loss: -0.13801268339157105
          vf_explained_var: 0.19205424189567566
          vf_loss: 0.011241406572581682
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,93,3130.92,93000,-3.7903,-2.96,-5.42,379.03




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-10-24_17-23-47
  done: false
  episode_len_mean: 378.58
  episode_media: {}
  episode_reward_max: -2.9199999999999817
  episode_reward_mean: -3.7857999999999628
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 240
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.5243391778733995
          entropy_coeff: 0.009999999999999998
          kl: 0.014586923439330313
          policy_loss: -0.1175384719338682
          total_loss: -0.11342946108844545
          vf_explained_var: 0.2405346781015396
          vf_loss: 0.013813928664765425
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,94,3181.73,94000,-3.7858,-2.92,-5.42,378.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-10-24_17-24-24
  done: false
  episode_len_mean: 375.52
  episode_media: {}
  episode_reward_max: -2.9199999999999817
  episode_reward_mean: -3.755199999999964
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 243
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.463035680188073
          entropy_coeff: 0.009999999999999998
          kl: 0.00615409983910469
          policy_loss: -0.11290251654055383
          total_loss: -0.10888746538096004
          vf_explained_var: 0.10743492096662521
          vf_loss: 0.01630877290541927
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,95,3218.77,95000,-3.7552,-2.92,-5.42,375.52


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-10-24_17-24-58
  done: false
  episode_len_mean: 374.73
  episode_media: {}
  episode_reward_max: -2.9199999999999817
  episode_reward_mean: -3.747299999999963
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 246
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.4353684809472826
          entropy_coeff: 0.009999999999999998
          kl: 0.005628204707820336
          policy_loss: -0.11629379350278113
          total_loss: -0.11373525891039106
          vf_explained_var: 0.19727590680122375
          vf_loss: 0.014775261210484638
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,96,3252.53,96000,-3.7473,-2.92,-5.42,374.73


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-10-24_17-25-32
  done: false
  episode_len_mean: 372.32
  episode_media: {}
  episode_reward_max: -2.9199999999999817
  episode_reward_mean: -3.7231999999999643
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 250
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.392179168595208
          entropy_coeff: 0.009999999999999998
          kl: 0.0055760031437268156
          policy_loss: -0.003275276472171148
          total_loss: 0.00023826981584231058
          vf_explained_var: 0.14949291944503784
          vf_loss: 0.015318199319558011
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,97,3286.72,97000,-3.7232,-2.92,-5.42,372.32


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-10-24_17-26-09
  done: false
  episode_len_mean: 369.6
  episode_media: {}
  episode_reward_max: -2.9199999999999817
  episode_reward_mean: -3.695999999999964
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 253
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3842195060518052
          entropy_coeff: 0.009999999999999998
          kl: 0.005386141085412236
          policy_loss: 0.09370937512980568
          total_loss: 0.08897721179657513
          vf_explained_var: 0.3199026584625244
          vf_loss: 0.007064978504786268
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,98,3323.17,98000,-3.696,-2.92,-5.42,369.6


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-10-24_17-26-44
  done: false
  episode_len_mean: 367.7
  episode_media: {}
  episode_reward_max: -2.9199999999999817
  episode_reward_mean: -3.6769999999999654
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 256
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3576907462543912
          entropy_coeff: 0.009999999999999998
          kl: 0.005124309586633973
          policy_loss: 0.05958597809076309
          total_loss: 0.05875471880038579
          vf_explained_var: 0.13046713173389435
          vf_loss: 0.01080001251021814
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,99,3358.83,99000,-3.677,-2.92,-5.42,367.7


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-10-24_17-27-20
  done: false
  episode_len_mean: 365.38
  episode_media: {}
  episode_reward_max: -2.9199999999999817
  episode_reward_mean: -3.6537999999999653
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 259
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.409870809978909
          entropy_coeff: 0.009999999999999998
          kl: 0.004049224533436026
          policy_loss: 0.026214231385125054
          total_loss: 0.025121619552373887
          vf_explained_var: 0.044088736176490784
          vf_loss: 0.011468658425534765
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,100,3394.18,100000,-3.6538,-2.92,-5.42,365.38


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-10-24_17-27-55
  done: false
  episode_len_mean: 362.63
  episode_media: {}
  episode_reward_max: -2.9199999999999817
  episode_reward_mean: -3.6262999999999663
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 262
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3470113330417208
          entropy_coeff: 0.009999999999999998
          kl: 0.0067428655823439
          policy_loss: -0.09534078033434021
          total_loss: -0.09276913180947303
          vf_explained_var: 0.044485632330179214
          vf_loss: 0.014761669685443242
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 101000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,101,3429.23,101000,-3.6263,-2.92,-5.42,362.63


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-10-24_17-28-30
  done: false
  episode_len_mean: 358.9
  episode_media: {}
  episode_reward_max: -2.6899999999999866
  episode_reward_mean: -3.5889999999999667
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 266
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1459422449270884
          entropy_coeff: 0.009999999999999998
          kl: 0.007025026608004402
          policy_loss: 0.017537811232937708
          total_loss: 0.019012518889374202
          vf_explained_var: 0.13358953595161438
          vf_loss: 0.011600473078174724
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 102000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,102,3464.52,102000,-3.589,-2.69,-5.42,358.9


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-10-24_17-29-10
  done: false
  episode_len_mean: 356.21
  episode_media: {}
  episode_reward_max: -2.6399999999999877
  episode_reward_mean: -3.562099999999967
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 269
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.099889971812566
          entropy_coeff: 0.009999999999999998
          kl: 0.010159748419837852
          policy_loss: 0.03471127334568236
          total_loss: 0.03544811548458205
          vf_explained_var: 0.2421187311410904
          vf_loss: 0.009806978712893195
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 103000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,103,3504.1,103000,-3.5621,-2.64,-5.42,356.21




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-10-24_17-30-11
  done: false
  episode_len_mean: 351.58
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -3.515799999999969
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 273
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1137027117941114
          entropy_coeff: 0.009999999999999998
          kl: 0.0070904010955696135
          policy_loss: 0.02143852954937352
          total_loss: 0.022765709128644732
          vf_explained_var: 0.43279486894607544
          vf_loss: 0.011118138829867045
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 104000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,104,3565.07,104000,-3.5158,-2.51,-5.42,351.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-10-24_17-30-52
  done: false
  episode_len_mean: 346.0
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -3.4599999999999693
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 277
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.065339728196462
          entropy_coeff: 0.009999999999999998
          kl: 0.007715537036472729
          policy_loss: 0.00863198083308008
          total_loss: 0.013912419146961636
          vf_explained_var: 0.294047474861145
          vf_loss: 0.01446908919347657
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 105000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,105,3606.27,105000,-3.46,-2.51,-5.42,346


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-10-24_17-31-35
  done: false
  episode_len_mean: 343.66
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -3.43659999999997
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 280
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 0.9622137195534176
          entropy_coeff: 0.009999999999999998
          kl: 0.005674513378308809
          policy_loss: -0.09245403144094679
          total_loss: -0.09330782228045993
          vf_explained_var: 0.7141082882881165
          vf_loss: 0.0076910719171994265
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 106000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,106,3649.14,106000,-3.4366,-2.51,-5.42,343.66


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-10-24_17-32-15
  done: false
  episode_len_mean: 335.22
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.3521999999999723
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 284
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 0.9891166932053036
          entropy_coeff: 0.009999999999999998
          kl: 0.007806313832621973
          policy_loss: 0.019082540604803296
          total_loss: 0.01767424667874972
          vf_explained_var: 0.7822338342666626
          vf_loss: 0.007000895286910236
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 107000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,107,3689.48,107000,-3.3522,-2.5,-5.42,335.22


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-10-24_17-32-57
  done: false
  episode_len_mean: 329.86
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.298599999999973
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 288
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 0.9760125438372295
          entropy_coeff: 0.009999999999999998
          kl: 0.005934673972777442
          policy_loss: 0.05003137638171514
          total_loss: 0.04631010459529029
          vf_explained_var: 0.9136946797370911
          vf_loss: 0.004912191068029238
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 108000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,108,3731.48,108000,-3.2986,-2.5,-5.42,329.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-10-24_17-33-38
  done: false
  episode_len_mean: 325.39
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.2538999999999736
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 292
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 0.9531093888812595
          entropy_coeff: 0.009999999999999998
          kl: 0.007098655080053727
          policy_loss: 0.0001681199504269494
          total_loss: -0.000440245411462254
          vf_explained_var: 0.8516014218330383
          vf_loss: 0.007575091771367524
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 109000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,109,3772.29,109000,-3.2539,-2.5,-5.42,325.39


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-10-24_17-34-19
  done: false
  episode_len_mean: 319.16
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.191599999999976
  episode_reward_min: -5.239999999999933
  episodes_this_iter: 4
  episodes_total: 296
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 0.8780286716090309
          entropy_coeff: 0.009999999999999998
          kl: 0.007587624878175821
          policy_loss: -0.005111232068803575
          total_loss: -0.006436163725124465
          vf_explained_var: 0.8354604244232178
          vf_loss: 0.006014888925063942
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,110,3813.17,110000,-3.1916,-2.5,-5.24,319.16




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-10-24_17-35-16
  done: false
  episode_len_mean: 312.71
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.1270999999999765
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 4
  episodes_total: 300
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 0.9765633079740736
          entropy_coeff: 0.009999999999999998
          kl: 0.012989026825972397
          policy_loss: -0.05636517049537765
          total_loss: -0.05829371627834108
          vf_explained_var: 0.8611732721328735
          vf_loss: 0.005371203382189075
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 111000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,111,3870.09,111000,-3.1271,-2.5,-4.67,312.71


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-10-24_17-35-59
  done: false
  episode_len_mean: 308.96
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.0895999999999773
  episode_reward_min: -4.339999999999952
  episodes_this_iter: 3
  episodes_total: 303
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.0281468543741439
          entropy_coeff: 0.009999999999999998
          kl: 0.00559747968595702
          policy_loss: 0.10380515861842367
          total_loss: 0.10065126063095199
          vf_explained_var: 0.7547787427902222
          vf_loss: 0.0060649243105823794
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 112000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,112,3913.49,112000,-3.0896,-2.5,-4.34,308.96


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-10-24_17-36-41
  done: false
  episode_len_mean: 304.86
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.0485999999999778
  episode_reward_min: -4.339999999999952
  episodes_this_iter: 4
  episodes_total: 307
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 0.9562268833319346
          entropy_coeff: 0.009999999999999998
          kl: 0.005592174275245378
          policy_loss: -0.042760677470101253
          total_loss: -0.041704941458172266
          vf_explained_var: 0.5793823003768921
          vf_loss: 0.009556365727136532
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 113000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,113,3954.87,113000,-3.0486,-2.5,-4.34,304.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-10-24_17-37-22
  done: false
  episode_len_mean: 301.35
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.0134999999999796
  episode_reward_min: -4.339999999999952
  episodes_this_iter: 4
  episodes_total: 311
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 0.9822284334235721
          entropy_coeff: 0.009999999999999998
          kl: 0.008387157479168501
          policy_loss: 0.029783033579587937
          total_loss: 0.0298242943154441
          vf_explained_var: 0.6412766575813293
          vf_loss: 0.008271293652554353
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 114000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,114,3996.7,114000,-3.0135,-2.5,-4.34,301.35


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-10-24_17-38-05
  done: false
  episode_len_mean: 297.26
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -2.9725999999999804
  episode_reward_min: -4.339999999999952
  episodes_this_iter: 4
  episodes_total: 315
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.0881352153089312
          entropy_coeff: 0.009999999999999998
          kl: 0.011695132257468781
          policy_loss: 0.01969976665245162
          total_loss: 0.021002574761708578
          vf_explained_var: 0.5539248585700989
          vf_loss: 0.00996391601446602
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 115000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,115,4039.02,115000,-2.9726,-2.5,-4.34,297.26


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-10-24_17-38-42
  done: false
  episode_len_mean: 294.38
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -2.9437999999999813
  episode_reward_min: -4.339999999999952
  episodes_this_iter: 4
  episodes_total: 319
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2009255011876425
          entropy_coeff: 0.009999999999999998
          kl: 0.0045580701230366095
          policy_loss: 0.014606798502306144
          total_loss: 0.01626179518385066
          vf_explained_var: 0.42377763986587524
          vf_loss: 0.012798928490115536
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 116000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,116,4076.53,116000,-2.9438,-2.5,-4.34,294.38


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-10-24_17-39-24
  done: false
  episode_len_mean: 292.53
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -2.9252999999999822
  episode_reward_min: -4.339999999999952
  episodes_this_iter: 3
  episodes_total: 322
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09492187500000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.168175028430091
          entropy_coeff: 0.009999999999999998
          kl: 0.010592597588051235
          policy_loss: -0.03978446755144331
          total_loss: -0.039235562044713235
          vf_explained_var: 0.2180643081665039
          vf_loss: 0.011225188585619132
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 117000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,117,4118.08,117000,-2.9253,-2.5,-4.34,292.53


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-10-24_17-40-04
  done: false
  episode_len_mean: 290.9
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -2.9089999999999816
  episode_reward_min: -4.339999999999952
  episodes_this_iter: 4
  episodes_total: 326
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09492187500000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.187997665670183
          entropy_coeff: 0.009999999999999998
          kl: 0.010221039723748819
          policy_loss: 0.0024539595676792993
          total_loss: 0.003977697839339574
          vf_explained_var: 0.29895541071891785
          vf_loss: 0.01243351349193189
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 118000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,118,4158.2,118000,-2.909,-2.5,-4.34,290.9




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-10-24_17-40-58
  done: false
  episode_len_mean: 288.87
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -2.888699999999982
  episode_reward_min: -4.339999999999952
  episodes_this_iter: 4
  episodes_total: 330
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09492187500000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.2209360811445449
          entropy_coeff: 0.009999999999999998
          kl: 0.012564222645107235
          policy_loss: -0.006174597475263808
          total_loss: -0.002308225797282325
          vf_explained_var: 0.10310938209295273
          vf_loss: 0.014883112855669525
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 119000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,119,4212.22,119000,-2.8887,-2.5,-4.34,288.87


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-10-24_17-41-39
  done: false
  episode_len_mean: 287.02
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -2.8701999999999823
  episode_reward_min: -4.339999999999952
  episodes_this_iter: 3
  episodes_total: 333
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09492187500000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.208504393365648
          entropy_coeff: 0.009999999999999998
          kl: 0.011139385166173613
          policy_loss: 0.049084901395771235
          total_loss: 0.04593245858947436
          vf_explained_var: 0.2619467079639435
          vf_loss: 0.00787523183454242
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,120,4252.98,120000,-2.8702,-2.5,-4.34,287.02


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-10-24_17-42-20
  done: false
  episode_len_mean: 282.47
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -2.824699999999984
  episode_reward_min: -4.339999999999952
  episodes_this_iter: 4
  episodes_total: 337
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09492187500000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.1519361085361906
          entropy_coeff: 0.009999999999999998
          kl: 0.013592006371928988
          policy_loss: 0.0218105576104588
          total_loss: 0.025863271537754272
          vf_explained_var: 0.07181645929813385
          vf_loss: 0.014281898302336534
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 121000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,121,4294.22,121000,-2.8247,-2.5,-4.34,282.47


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-10-24_17-43-01
  done: false
  episode_len_mean: 279.25
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -2.792499999999984
  episode_reward_min: -3.619999999999967
  episodes_this_iter: 4
  episodes_total: 341
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09492187500000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.1621668921576607
          entropy_coeff: 0.009999999999999998
          kl: 0.007037753815087516
          policy_loss: -0.005221351650026109
          total_loss: -0.0023321232861942716
          vf_explained_var: 0.09177980571985245
          vf_loss: 0.0138428567080862
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 122000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,122,4334.86,122000,-2.7925,-2.5,-3.62,279.25


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-10-24_17-43-41
  done: false
  episode_len_mean: 277.86
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -2.7785999999999844
  episode_reward_min: -3.619999999999967
  episodes_this_iter: 3
  episodes_total: 344
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09492187500000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.1821018894513449
          entropy_coeff: 0.009999999999999998
          kl: 0.005383511476570706
          policy_loss: 0.008546630375915104
          total_loss: 0.0057145305805736115
          vf_explained_var: 0.19947123527526855
          vf_loss: 0.008477907612298925
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,123,4374.88,123000,-2.7786,-2.5,-3.62,277.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-10-24_17-44-18
  done: false
  episode_len_mean: 276.52
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -2.7651999999999854
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 348
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09492187500000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.159569383992089
          entropy_coeff: 0.009999999999999998
          kl: 0.009080847555223433
          policy_loss: -0.011551780005296072
          total_loss: -0.006573272082540724
          vf_explained_var: 0.05421484261751175
          vf_loss: 0.01571223099405567
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 124000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,124,4412.03,124000,-2.7652,-2.5,-3.35,276.52


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-10-24_17-44-56
  done: false
  episode_len_mean: 275.67
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -2.7566999999999853
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 3
  episodes_total: 351
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09492187500000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.1327656428019206
          entropy_coeff: 0.009999999999999998
          kl: 0.010604837547777003
          policy_loss: 0.04367505196068022
          total_loss: 0.043728173772494
          vf_explained_var: -0.27300626039505005
          vf_loss: 0.010374147278101493
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 125000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,125,4449.55,125000,-2.7567,-2.5,-3.35,275.67


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-10-24_17-45-32
  done: false
  episode_len_mean: 274.98
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -2.7497999999999854
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 3
  episodes_total: 354
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09492187500000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.101356189780765
          entropy_coeff: 0.009999999999999998
          kl: 0.008246860395764857
          policy_loss: -0.11026283941335148
          total_loss: -0.10627403921551175
          vf_explained_var: 0.16283008456230164
          vf_loss: 0.014219552899400393
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trained: 126000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,126,4486.27,126000,-2.7498,-2.5,-3.35,274.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-10-24_17-46-12
  done: false
  episode_len_mean: 273.01
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -2.7300999999999855
  episode_reward_min: -3.239999999999975
  episodes_this_iter: 4
  episodes_total: 358
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09492187500000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.0946107135878669
          entropy_coeff: 0.009999999999999998
          kl: 0.00885054031432399
          policy_loss: 0.0027833753989802467
          total_loss: 0.005014502753814061
          vf_explained_var: 0.20498424768447876
          vf_loss: 0.012337122878266705
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 127000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,127,4525.96,127000,-2.7301,-2.5,-3.24,273.01




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-10-24_17-47-12
  done: false
  episode_len_mean: 270.52
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.7051999999999854
  episode_reward_min: -3.179999999999976
  episodes_this_iter: 4
  episodes_total: 362
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09492187500000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.0485153860516019
          entropy_coeff: 0.009999999999999998
          kl: 0.012460568838747656
          policy_loss: -0.02525692441397243
          total_loss: -0.020874302171998555
          vf_explained_var: 0.11137615889310837
          vf_loss: 0.013684994034055207
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 128000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,128,4585.94,128000,-2.7052,-2.28,-3.18,270.52


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-10-24_17-47-54
  done: false
  episode_len_mean: 268.68
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.686799999999987
  episode_reward_min: -3.089999999999978
  episodes_this_iter: 4
  episodes_total: 366
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09492187500000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.0591385417514376
          entropy_coeff: 0.009999999999999998
          kl: 0.007825437876504913
          policy_loss: -0.004322337152229415
          total_loss: -0.0017796089665757285
          vf_explained_var: 0.3067431151866913
          vf_loss: 0.012391308995170726
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 129000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,129,4627.9,129000,-2.6868,-2.28,-3.09,268.68


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-10-24_17-48-37
  done: false
  episode_len_mean: 267.87
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.6786999999999863
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 370
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09492187500000004
          cur_lr: 5.000000000000001e-05
          entropy: 0.9983882354365454
          entropy_coeff: 0.009999999999999998
          kl: 0.010649702226679993
          policy_loss: 0.04057371566692988
          total_loss: 0.04351914260122511
          vf_explained_var: 0.2440096139907837
          vf_loss: 0.011918422093407975
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,130,4670.4,130000,-2.6787,-2.28,-3.04,267.87


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-10-24_17-49-18
  done: false
  episode_len_mean: 267.28
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.6727999999999863
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 374
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09492187500000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.0072345323032803
          entropy_coeff: 0.009999999999999998
          kl: 0.0078013431466375845
          policy_loss: -0.0006985438366731007
          total_loss: 0.0058547866841157275
          vf_explained_var: 0.09138312935829163
          vf_loss: 0.015885156258526774
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 131

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,131,4711.96,131000,-2.6728,-2.28,-3.04,267.28


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-10-24_17-50-00
  done: false
  episode_len_mean: 266.75
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.667499999999987
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 378
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09492187500000004
          cur_lr: 5.000000000000001e-05
          entropy: 0.9650641865200467
          entropy_coeff: 0.009999999999999998
          kl: 0.004552894192381392
          policy_loss: 0.009588044716252221
          total_loss: 0.011756607227855258
          vf_explained_var: 0.37908244132995605
          vf_loss: 0.011387033998552296
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained: 132000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,132,4754.13,132000,-2.6675,-2.28,-3.04,266.75


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-10-24_17-50-44
  done: false
  episode_len_mean: 266.73
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.667299999999987
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 382
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04746093750000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.0260376354058585
          entropy_coeff: 0.009999999999999998
          kl: 0.012399855526513587
          policy_loss: 0.014543276362948948
          total_loss: 0.01735994451575809
          vf_explained_var: 0.36227014660835266
          vf_loss: 0.01248853759219249
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 133000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,133,4797.54,133000,-2.6673,-2.28,-3.04,266.73


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-10-24_17-51-27
  done: false
  episode_len_mean: 266.48
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.664799999999987
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 386
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04746093750000002
          cur_lr: 5.000000000000001e-05
          entropy: 0.9613913973172505
          entropy_coeff: 0.009999999999999998
          kl: 0.009598194071807677
          policy_loss: 0.014223288910256492
          total_loss: 0.016164883391724692
          vf_explained_var: 0.4884043335914612
          vf_loss: 0.011099969988895788
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 134000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,134,4840.31,134000,-2.6648,-2.28,-3.04,266.48




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-10-24_17-52-24
  done: false
  episode_len_mean: 265.84
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.6583999999999866
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 390
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04746093750000002
          cur_lr: 5.000000000000001e-05
          entropy: 0.9756399002340105
          entropy_coeff: 0.009999999999999998
          kl: 0.014585739545405986
          policy_loss: 0.047009819166527855
          total_loss: 0.04779498945507738
          vf_explained_var: 0.6156379580497742
          vf_loss: 0.009849321097135544
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 135000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,135,4897.56,135000,-2.6584,-2.27,-3.04,265.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-10-24_17-53-10
  done: false
  episode_len_mean: 265.65
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.6564999999999865
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 394
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04746093750000002
          cur_lr: 5.000000000000001e-05
          entropy: 0.9902137491438124
          entropy_coeff: 0.009999999999999998
          kl: 0.01772868671244479
          policy_loss: -0.014671327773895529
          total_loss: -0.015019002474016614
          vf_explained_var: 0.6860846877098083
          vf_loss: 0.008713041582248277
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,136,4943.31,136000,-2.6565,-2.27,-3.04,265.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-10-24_17-53-52
  done: false
  episode_len_mean: 265.84
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.6583999999999866
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 3
  episodes_total: 397
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04746093750000002
          cur_lr: 5.000000000000001e-05
          entropy: 0.9798180752330357
          entropy_coeff: 0.009999999999999998
          kl: 0.03321244709125687
          policy_loss: -0.08619063943624497
          total_loss: -0.0862171631720331
          vf_explained_var: 0.7126585841178894
          vf_loss: 0.008195360693045788
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 137000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,137,4985.86,137000,-2.6584,-2.27,-3.04,265.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-10-24_17-54-36
  done: false
  episode_len_mean: 265.64
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.6563999999999868
  episode_reward_min: -3.00999999999998
  episodes_this_iter: 4
  episodes_total: 401
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07119140624999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.0411282340685526
          entropy_coeff: 0.009999999999999998
          kl: 0.030852622433979387
          policy_loss: -0.042833264751566784
          total_loss: -0.04276033341884613
          vf_explained_var: 0.7163006067276001
          vf_loss: 0.008287774321312705
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 138000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,138,5029.15,138000,-2.6564,-2.27,-3.01,265.64


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-10-24_17-55-19
  done: false
  episode_len_mean: 265.27
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.6526999999999874
  episode_reward_min: -3.00999999999998
  episodes_this_iter: 4
  episodes_total: 405
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.106787109375
          cur_lr: 5.000000000000001e-05
          entropy: 0.8967467824618022
          entropy_coeff: 0.009999999999999998
          kl: 0.008396487159663164
          policy_loss: 0.028801405512624317
          total_loss: 0.02923761291636361
          vf_explained_var: 0.7346905469894409
          vf_loss: 0.008507040696632532
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 139000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,139,5072.87,139000,-2.6527,-2.27,-3.01,265.27


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-10-24_17-56-03
  done: false
  episode_len_mean: 265.48
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.6547999999999865
  episode_reward_min: -3.00999999999998
  episodes_this_iter: 4
  episodes_total: 409
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.106787109375
          cur_lr: 5.000000000000001e-05
          entropy: 1.0373616788122388
          entropy_coeff: 0.009999999999999998
          kl: 0.007316633064359242
          policy_loss: 0.017546717615591154
          total_loss: 0.015457405398289362
          vf_explained_var: 0.7747606039047241
          vf_loss: 0.00750298133223421
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,140,5116.25,140000,-2.6548,-2.27,-3.01,265.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-10-24_17-56-47
  done: false
  episode_len_mean: 265.36
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.653599999999987
  episode_reward_min: -3.00999999999998
  episodes_this_iter: 4
  episodes_total: 413
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.106787109375
          cur_lr: 5.000000000000001e-05
          entropy: 0.8858751906288995
          entropy_coeff: 0.009999999999999998
          kl: 0.006978034788807482
          policy_loss: 0.07443836430708567
          total_loss: 0.07392884981301096
          vf_explained_var: 0.7857964634895325
          vf_loss: 0.007604073034599423
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 141000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,141,5160.44,141000,-2.6536,-2.27,-3.01,265.36


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-10-24_17-57-31
  done: false
  episode_len_mean: 264.72
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.6471999999999882
  episode_reward_min: -3.00999999999998
  episodes_this_iter: 4
  episodes_total: 417
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.106787109375
          cur_lr: 5.000000000000001e-05
          entropy: 0.8375332944922977
          entropy_coeff: 0.009999999999999998
          kl: 0.017952937287790842
          policy_loss: 0.01768139542804824
          total_loss: 0.019514537768231498
          vf_explained_var: 0.6862900853157043
          vf_loss: 0.008291332196030352
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained: 142000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,142,5204.69,142000,-2.6472,-2.27,-3.01,264.72




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-10-24_17-58-32
  done: false
  episode_len_mean: 263.8
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.6379999999999875
  episode_reward_min: -3.00999999999998
  episodes_this_iter: 4
  episodes_total: 421
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.106787109375
          cur_lr: 5.000000000000001e-05
          entropy: 0.8158069127135806
          entropy_coeff: 0.009999999999999998
          kl: 0.010923353767039935
          policy_loss: 0.009642085267437829
          total_loss: 0.011236311495304107
          vf_explained_var: 0.7310546636581421
          vf_loss: 0.008585821671618355
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_trained: 143000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,143,5265.01,143000,-2.638,-2.27,-3.01,263.8


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-10-24_17-59-14
  done: false
  episode_len_mean: 262.97
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.6296999999999877
  episode_reward_min: -3.00999999999998
  episodes_this_iter: 4
  episodes_total: 425
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.106787109375
          cur_lr: 5.000000000000001e-05
          entropy: 0.8572546700636546
          entropy_coeff: 0.009999999999999998
          kl: 0.009865848593620106
          policy_loss: 0.024282973094118965
          total_loss: 0.023953624980317223
          vf_explained_var: 0.7937200665473938
          vf_loss: 0.007189653182609214
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained: 144000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,144,5307.4,144000,-2.6297,-2.27,-3.01,262.97


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-10-24_17-59-57
  done: false
  episode_len_mean: 262.5
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.624999999999988
  episode_reward_min: -3.00999999999998
  episodes_this_iter: 4
  episodes_total: 429
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.106787109375
          cur_lr: 5.000000000000001e-05
          entropy: 0.683619909816318
          entropy_coeff: 0.009999999999999998
          kl: 0.007869673589930275
          policy_loss: -0.021930856671598224
          total_loss: -0.017044508539968067
          vf_explained_var: 0.513982355594635
          vf_loss: 0.010882168014844259
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_trained: 145000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,145,5350.01,145000,-2.625,-2.27,-3.01,262.5


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-10-24_18-00-38
  done: false
  episode_len_mean: 262.21
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.622099999999987
  episode_reward_min: -3.00999999999998
  episodes_this_iter: 3
  episodes_total: 432
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.106787109375
          cur_lr: 5.000000000000001e-05
          entropy: 0.6715133779578739
          entropy_coeff: 0.009999999999999998
          kl: 0.017122307736742098
          policy_loss: -0.08932246300909254
          total_loss: -0.08517586853769091
          vf_explained_var: 0.5450673699378967
          vf_loss: 0.009033287435563074
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_trained: 146000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,146,5391.36,146000,-2.6221,-2.27,-3.01,262.21


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-10-24_18-01-20
  done: false
  episode_len_mean: 261.51
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.615099999999988
  episode_reward_min: -3.00999999999998
  episodes_this_iter: 4
  episodes_total: 436
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.106787109375
          cur_lr: 5.000000000000001e-05
          entropy: 0.48505045572916666
          entropy_coeff: 0.009999999999999998
          kl: 0.005001270905936739
          policy_loss: -0.024189243382877774
          total_loss: -0.017486223330100376
          vf_explained_var: 0.4374593198299408
          vf_loss: 0.011019452816496293
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_trained: 147000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,147,5433.45,147000,-2.6151,-2.27,-3.01,261.51


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-10-24_18-02-03
  done: false
  episode_len_mean: 261.18
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.611799999999988
  episode_reward_min: -3.00999999999998
  episodes_this_iter: 4
  episodes_total: 440
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.106787109375
          cur_lr: 5.000000000000001e-05
          entropy: 0.4942068335082796
          entropy_coeff: 0.009999999999999998
          kl: 0.007518640750498958
          policy_loss: -0.02350713171892696
          total_loss: -0.017114989625083076
          vf_explained_var: 0.3791424334049225
          vf_loss: 0.01053131880859534
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained: 148000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,148,5476.66,148000,-2.6118,-2.27,-3.01,261.18


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-10-24_18-02-47
  done: false
  episode_len_mean: 259.72
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.5971999999999884
  episode_reward_min: -3.00999999999998
  episodes_this_iter: 4
  episodes_total: 444
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.106787109375
          cur_lr: 5.000000000000001e-05
          entropy: 0.6424428986178504
          entropy_coeff: 0.009999999999999998
          kl: 0.014196296456861527
          policy_loss: -0.02432590979668829
          total_loss: -0.01975972114337815
          vf_explained_var: 0.45808181166648865
          vf_loss: 0.009474637519775165
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_trained: 149000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,149,5520.47,149000,-2.5972,-2.27,-3.01,259.72


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-10-24_18-03-29
  done: false
  episode_len_mean: 257.81
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.5780999999999894
  episode_reward_min: -2.9699999999999807
  episodes_this_iter: 4
  episodes_total: 448
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.106787109375
          cur_lr: 5.000000000000001e-05
          entropy: 0.7007103039158715
          entropy_coeff: 0.009999999999999998
          kl: 0.004109175260684831
          policy_loss: -0.08049130232797728
          total_loss: -0.076582566400369
          vf_explained_var: 0.5261440873146057
          vf_loss: 0.010477027762681246
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 150000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,150,5562.17,150000,-2.5781,-2.27,-2.97,257.81




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-10-24_18-04-31
  done: false
  episode_len_mean: 255.6
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.555999999999989
  episode_reward_min: -2.9299999999999815
  episodes_this_iter: 5
  episodes_total: 453
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 0.7100358075565762
          entropy_coeff: 0.009999999999999998
          kl: 0.008251285209325311
          policy_loss: -0.027175287240081364
          total_loss: -0.021330060395929547
          vf_explained_var: 0.4822085201740265
          vf_loss: 0.012505018628305858
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained: 151000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,151,5624.34,151000,-2.556,-2.14,-2.93,255.6


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-10-24_18-05-16
  done: false
  episode_len_mean: 254.15
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.5414999999999894
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 457
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 0.6739210274484423
          entropy_coeff: 0.009999999999999998
          kl: 0.014026675639216082
          policy_loss: -0.01801495146420267
          total_loss: -0.011988199005524317
          vf_explained_var: 0.3957061767578125
          vf_loss: 0.012017027092062765
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained: 152000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,152,5669.29,152000,-2.5415,-2.14,-2.85,254.15


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-10-24_18-06-00
  done: false
  episode_len_mean: 254.01
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.5400999999999896
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 461
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 0.6988954848713345
          entropy_coeff: 0.009999999999999998
          kl: 0.009053945427738618
          policy_loss: 0.006618069443437788
          total_loss: 0.009740485499302546
          vf_explained_var: 0.5820479393005371
          vf_loss: 0.00962794643516342
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_trained: 153000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,153,5712.66,153000,-2.5401,-2.14,-2.85,254.01


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-10-24_18-06-44
  done: false
  episode_len_mean: 253.79
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.53789999999999
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 465
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 0.5236994163857566
          entropy_coeff: 0.009999999999999998
          kl: 0.0051606747905621675
          policy_loss: -0.01014539214471976
          total_loss: -0.0049982776244481405
          vf_explained_var: 0.4969382882118225
          vf_loss: 0.010108562445061075
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_trained: 154000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,154,5757.12,154000,-2.5379,-2.14,-2.85,253.79


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-10-24_18-07-29
  done: false
  episode_len_mean: 253.2
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.53199999999999
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 469
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 0.46682306230068205
          entropy_coeff: 0.009999999999999998
          kl: 0.006260693387901976
          policy_loss: 0.0009567857616477542
          total_loss: 0.006592867854568693
          vf_explained_var: 0.3836989104747772
          vf_loss: 0.00997003053004543
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_trained: 155000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,155,5802.36,155000,-2.532,-2.14,-2.85,253.2


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-10-24_18-08-13
  done: false
  episode_len_mean: 252.8
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.52799999999999
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 473
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 0.5421169201532999
          entropy_coeff: 0.009999999999999998
          kl: 0.007514028763621071
          policy_loss: 0.004338190125094519
          total_loss: 0.010533926718764835
          vf_explained_var: 0.37414833903312683
          vf_loss: 0.011215706097169055
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained: 156000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,156,5846.26,156000,-2.528,-2.14,-2.85,252.8


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-10-24_18-08-57
  done: false
  episode_len_mean: 252.4
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.5239999999999903
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 477
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 0.7155112955305312
          entropy_coeff: 0.009999999999999998
          kl: 0.012497229126465035
          policy_loss: 0.05873267667161094
          total_loss: 0.06177165508270264
          vf_explained_var: 0.49540942907333374
          vf_loss: 0.009526822152030138
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trained: 157000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,157,5890.21,157000,-2.524,-2.14,-2.85,252.4




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-10-24_18-09-56
  done: false
  episode_len_mean: 252.32
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.52319999999999
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 481
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.0498980946011014
          entropy_coeff: 0.009999999999999998
          kl: 0.021735252785099173
          policy_loss: 0.022167109325528143
          total_loss: 0.025582481920719147
          vf_explained_var: 0.3926529884338379
          vf_loss: 0.01275383083977633
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained: 158000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,158,5948.69,158000,-2.5232,-2.14,-2.85,252.32


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-10-24_18-10-42
  done: false
  episode_len_mean: 251.98
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.51979999999999
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 485
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08009033203125002
          cur_lr: 5.000000000000001e-05
          entropy: 0.7214175201124615
          entropy_coeff: 0.009999999999999998
          kl: 0.04337384757117696
          policy_loss: 0.00882572444776694
          total_loss: 0.016481233512361845
          vf_explained_var: 0.3969510495662689
          vf_loss: 0.011395856882962916
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_trained: 159000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,159,5995.27,159000,-2.5198,-2.14,-3.1,251.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-10-24_18-11-30
  done: false
  episode_len_mean: 250.78
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.5077999999999903
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 489
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 0.43573430279890696
          entropy_coeff: 0.009999999999999998
          kl: 0.003616415219924028
          policy_loss: -0.018924654606315823
          total_loss: -0.011842365397347345
          vf_explained_var: 0.36281895637512207
          vf_loss: 0.011005171409083737
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 1600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,160,6042.73,160000,-2.5078,-2.14,-3.1,250.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-10-24_18-12-17
  done: false
  episode_len_mean: 249.67
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4966999999999904
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 5
  episodes_total: 494
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 0.5599145915773179
          entropy_coeff: 0.009999999999999998
          kl: 0.017966050177179227
          policy_loss: -0.0032124601304531096
          total_loss: 0.005555350912941827
          vf_explained_var: 0.40521302819252014
          vf_loss: 0.01328777733983265
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_trained: 161000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,161,6089.46,161000,-2.4967,-2.14,-3.1,249.67


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-10-24_18-13-03
  done: false
  episode_len_mean: 248.26
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4825999999999913
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 498
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 0.4355618490113152
          entropy_coeff: 0.009999999999999998
          kl: 0.009203252406032586
          policy_loss: 0.012875797268417146
          total_loss: 0.017221826397710378
          vf_explained_var: 0.46810510754585266
          vf_loss: 0.008148830151185393
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_trained: 162000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,162,6135.6,162000,-2.4826,-2.14,-3.1,248.26


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-10-24_18-13-50
  done: false
  episode_len_mean: 246.64
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4663999999999913
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 5
  episodes_total: 503
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 0.353802200489574
          entropy_coeff: 0.009999999999999998
          kl: 0.006888029385904974
          policy_loss: -0.047145728684133956
          total_loss: -0.039124716652764215
          vf_explained_var: 0.42325133085250854
          vf_loss: 0.011145283716420333
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trained: 163000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,163,6183.26,163000,-2.4664,-2.14,-3.1,246.64


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-10-24_18-14-39
  done: false
  episode_len_mean: 245.04
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.450399999999992
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 507
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 0.5716170893775092
          entropy_coeff: 0.009999999999999998
          kl: 0.043390438627971324
          policy_loss: 0.01519586518406868
          total_loss: 0.02107302492691411
          vf_explained_var: 0.39232733845710754
          vf_loss: 0.008986962369332711
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_trained: 164000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,164,6232,164000,-2.4504,-2.14,-3.1,245.04




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-10-24_18-15-43
  done: false
  episode_len_mean: 243.99
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4398999999999917
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 511
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09010162353515627
          cur_lr: 5.000000000000001e-05
          entropy: 0.6972910596264733
          entropy_coeff: 0.009999999999999998
          kl: 0.012051189197231338
          policy_loss: -0.06768347438838747
          total_loss: -0.06395196960204177
          vf_explained_var: 0.21739543974399567
          vf_loss: 0.009618583383659523
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_trained: 165000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,165,6295.89,165000,-2.4399,-2.04,-3.1,243.99


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-10-24_18-16-29
  done: false
  episode_len_mean: 243.08
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.430799999999992
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 5
  episodes_total: 516
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09010162353515627
          cur_lr: 5.000000000000001e-05
          entropy: 0.685716383655866
          entropy_coeff: 0.009999999999999998
          kl: 0.02130219585840669
          policy_loss: -0.020915252218643824
          total_loss: -0.014690649426645703
          vf_explained_var: 0.2789921760559082
          vf_loss: 0.011162406345829368
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 166000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,166,6342.15,166000,-2.4308,-2.04,-3.1,243.08


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-10-24_18-17-14
  done: false
  episode_len_mean: 242.71
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4270999999999923
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 520
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1351524353027344
          cur_lr: 5.000000000000001e-05
          entropy: 0.6953881111409929
          entropy_coeff: 0.009999999999999998
          kl: 0.011316420348609846
          policy_loss: 0.03008976255853971
          total_loss: 0.03510351197587119
          vf_explained_var: 0.08341585844755173
          vf_loss: 0.010438190162595775
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained: 167000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,167,6386.57,167000,-2.4271,-2.04,-3.1,242.71


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-10-24_18-17-50
  done: false
  episode_len_mean: 243.93
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4392999999999923
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 3
  episodes_total: 523
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1351524353027344
          cur_lr: 5.000000000000001e-05
          entropy: 1.3440416117509206
          entropy_coeff: 0.009999999999999998
          kl: 0.01656627078421587
          policy_loss: 0.013688002857897017
          total_loss: 0.009950917462507885
          vf_explained_var: 0.20408771932125092
          vf_loss: 0.007464354956108663
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 168000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,168,6423.01,168000,-2.4393,-2.04,-3.34,243.93


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-10-24_18-18-26
  done: false
  episode_len_mean: 246.09
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4608999999999916
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 3
  episodes_total: 526
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1351524353027344
          cur_lr: 5.000000000000001e-05
          entropy: 1.5883295820819008
          entropy_coeff: 0.009999999999999998
          kl: 0.028531440946239172
          policy_loss: -0.0374173288544019
          total_loss: -0.04092810137404336
          vf_explained_var: -0.047241225838661194
          vf_loss: 0.008516427446092064
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_trained: 169000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,169,6458.22,169000,-2.4609,-2.04,-3.88,246.09


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-10-24_18-19-08
  done: false
  episode_len_mean: 245.78
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4577999999999918
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 530
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20272865295410147
          cur_lr: 5.000000000000001e-05
          entropy: 1.1042656401793163
          entropy_coeff: 0.009999999999999998
          kl: 0.016212066891007577
          policy_loss: -0.005791333773069911
          total_loss: -0.0009507518261671066
          vf_explained_var: 0.06960925459861755
          vf_loss: 0.012596590289225182
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 170

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,170,6500.24,170000,-2.4578,-2.04,-3.88,245.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-10-24_18-19-51
  done: false
  episode_len_mean: 244.77
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4476999999999918
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 534
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20272865295410147
          cur_lr: 5.000000000000001e-05
          entropy: 0.771518760919571
          entropy_coeff: 0.009999999999999998
          kl: 0.005762039960485133
          policy_loss: -0.059411095827817915
          total_loss: -0.0535414833161566
          vf_explained_var: 0.13036346435546875
          vf_loss: 0.012416668194863532
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained: 171000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,171,6543.76,171000,-2.4477,-2.04,-3.88,244.77


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-10-24_18-20-38
  done: false
  episode_len_mean: 243.76
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4375999999999918
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 5
  episodes_total: 539
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20272865295410147
          cur_lr: 5.000000000000001e-05
          entropy: 0.5442025641600291
          entropy_coeff: 0.009999999999999998
          kl: 0.0016776349945920464
          policy_loss: -0.018327779157294166
          total_loss: -0.0070849294463793434
          vf_explained_var: 0.11756188422441483
          vf_loss: 0.01634477178255717
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained: 172

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,172,6590.34,172000,-2.4376,-2.04,-3.88,243.76




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-10-24_18-21-43
  done: false
  episode_len_mean: 243.15
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.431499999999992
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 543
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10136432647705074
          cur_lr: 5.000000000000001e-05
          entropy: 0.7447093788120481
          entropy_coeff: 0.009999999999999998
          kl: 0.009720996573181396
          policy_loss: 0.020208965780006513
          total_loss: 0.026783605996105405
          vf_explained_var: 0.12714186310768127
          vf_loss: 0.01303637172612879
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained: 173000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,173,6655.14,173000,-2.4315,-1.93,-3.88,243.15


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-10-24_18-22-30
  done: false
  episode_len_mean: 242.49
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.424899999999992
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 547
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10136432647705074
          cur_lr: 5.000000000000001e-05
          entropy: 0.6768995162513521
          entropy_coeff: 0.009999999999999998
          kl: 0.005927156011372898
          policy_loss: -0.07428366074131595
          total_loss: -0.0664145083891021
          vf_explained_var: 0.11570234596729279
          vf_loss: 0.014037346394939556
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_trained: 174000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,174,6702.47,174000,-2.4249,-1.93,-3.88,242.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-10-24_18-23-16
  done: false
  episode_len_mean: 242.04
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4203999999999923
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 5
  episodes_total: 552
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10136432647705074
          cur_lr: 5.000000000000001e-05
          entropy: 0.6551132162412008
          entropy_coeff: 0.009999999999999998
          kl: 0.008220623381086591
          policy_loss: 0.002272816664642758
          total_loss: 0.011050166189670562
          vf_explained_var: 0.16164565086364746
          vf_loss: 0.014495202309141557
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_trained: 175000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,175,6748.65,175000,-2.4204,-1.93,-3.88,242.04


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-10-24_18-24-03
  done: false
  episode_len_mean: 241.67
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4166999999999925
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 556
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10136432647705074
          cur_lr: 5.000000000000001e-05
          entropy: 0.6416227092345556
          entropy_coeff: 0.009999999999999998
          kl: 0.010181716552633209
          policy_loss: 0.03159656665391392
          total_loss: 0.03927845996287134
          vf_explained_var: 0.03232482820749283
          vf_loss: 0.013066059061222607
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained: 176000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,176,6795.36,176000,-2.4167,-1.93,-3.88,241.67


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-10-24_18-24-51
  done: false
  episode_len_mean: 240.65
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4064999999999923
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 560
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10136432647705074
          cur_lr: 5.000000000000001e-05
          entropy: 0.5932268980476592
          entropy_coeff: 0.009999999999999998
          kl: 0.005960351305308128
          policy_loss: -0.0387328140437603
          total_loss: -0.0301310362915198
          vf_explained_var: 0.07264615595340729
          vf_loss: 0.013929875774516
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_trained: 177000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,177,6843.61,177000,-2.4065,-1.93,-3.88,240.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-10-24_18-25-39
  done: false
  episode_len_mean: 239.54
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.395399999999993
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 5
  episodes_total: 565
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10136432647705074
          cur_lr: 5.000000000000001e-05
          entropy: 0.5061533119943407
          entropy_coeff: 0.009999999999999998
          kl: 0.003545419634286557
          policy_loss: 0.008809021694792642
          total_loss: 0.021028311798969905
          vf_explained_var: 0.10342660546302795
          vf_loss: 0.01692144920428594
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained: 178000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,178,6891.3,178000,-2.3954,-1.93,-3.88,239.54


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-10-24_18-26-26
  done: false
  episode_len_mean: 238.99
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3898999999999933
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 569
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05068216323852537
          cur_lr: 5.000000000000001e-05
          entropy: 0.5647717548741235
          entropy_coeff: 0.009999999999999998
          kl: 0.018867766631430367
          policy_loss: 0.002422932121488783
          total_loss: 0.01010970092482037
          vf_explained_var: 0.1470825970172882
          vf_loss: 0.012378226934621732
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained: 179000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,179,6938.49,179000,-2.3899,-1.93,-3.88,238.99




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-10-24_18-27-31
  done: false
  episode_len_mean: 237.65
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3764999999999934
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 5
  episodes_total: 574
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05068216323852537
          cur_lr: 5.000000000000001e-05
          entropy: 0.5846954984797372
          entropy_coeff: 0.009999999999999998
          kl: 0.007698355540647681
          policy_loss: -0.010513650212022994
          total_loss: 0.00012157393826378716
          vf_explained_var: 0.11926359683275223
          vf_loss: 0.016092007596873573
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,180,7003,180000,-2.3765,-1.93,-3.88,237.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-10-24_18-28-19
  done: false
  episode_len_mean: 237.21
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.372099999999993
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 578
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05068216323852537
          cur_lr: 5.000000000000001e-05
          entropy: 0.5822989016771316
          entropy_coeff: 0.009999999999999998
          kl: 0.004387984636060733
          policy_loss: 0.019716133342848885
          total_loss: 0.02697579935193062
          vf_explained_var: 0.1094142347574234
          vf_loss: 0.012860264587733481
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 181000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,181,7051.29,181000,-2.3721,-1.93,-3.88,237.21


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-10-24_18-29-06
  done: false
  episode_len_mean: 235.61
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.356099999999994
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 582
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.025341081619262684
          cur_lr: 5.000000000000001e-05
          entropy: 0.6218256460295783
          entropy_coeff: 0.009999999999999998
          kl: 0.004442785324668977
          policy_loss: -0.057726610203584036
          total_loss: -0.05052063655522135
          vf_explained_var: 0.1021982803940773
          vf_loss: 0.013311646816631158
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_trained: 182000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,182,7098.8,182000,-2.3561,-1.93,-3.88,235.61


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-10-24_18-29-53
  done: false
  episode_len_mean: 235.64
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3563999999999936
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 5
  episodes_total: 587
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012670540809631342
          cur_lr: 5.000000000000001e-05
          entropy: 0.611530237727695
          entropy_coeff: 0.009999999999999998
          kl: 0.006173793757474992
          policy_loss: 0.014319581869575713
          total_loss: 0.02215326502919197
          vf_explained_var: 0.0764765590429306
          vf_loss: 0.013870760105136368
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 183000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,183,7145.78,183000,-2.3564,-1.93,-3.88,235.64


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-10-24_18-30-40
  done: false
  episode_len_mean: 236.05
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3604999999999934
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 591
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012670540809631342
          cur_lr: 5.000000000000001e-05
          entropy: 0.6397627022531297
          entropy_coeff: 0.009999999999999998
          kl: 0.011090666244466026
          policy_loss: 0.024730047004090414
          total_loss: 0.03215093318786886
          vf_explained_var: 0.041090354323387146
          vf_loss: 0.013677989039570094
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 18400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,184,7192.04,184000,-2.3605,-1.93,-3.88,236.05


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-10-24_18-31-28
  done: false
  episode_len_mean: 235.97
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.359699999999994
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 595
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012670540809631342
          cur_lr: 5.000000000000001e-05
          entropy: 0.5213086588515176
          entropy_coeff: 0.009999999999999998
          kl: 0.018825208353430076
          policy_loss: -0.12184111624956132
          total_loss: -0.10921032288008266
          vf_explained_var: 0.08408844470977783
          vf_loss: 0.01760535804140899
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained: 185000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,185,7240.09,185000,-2.3597,-1.93,-3.88,235.97




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-10-24_18-32-32
  done: false
  episode_len_mean: 235.98
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3597999999999937
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 5
  episodes_total: 600
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012670540809631342
          cur_lr: 5.000000000000001e-05
          entropy: 0.6093469076686435
          entropy_coeff: 0.009999999999999998
          kl: 0.018334388431491
          policy_loss: 0.012642921341790094
          total_loss: 0.021058375967873466
          vf_explained_var: 0.1292044073343277
          vf_loss: 0.014276618748489353
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained: 186000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,186,7304.13,186000,-2.3598,-1.93,-3.88,235.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-10-24_18-33-22
  done: false
  episode_len_mean: 236.21
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3620999999999936
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 604
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012670540809631342
          cur_lr: 5.000000000000001e-05
          entropy: 0.6075445857312944
          entropy_coeff: 0.009999999999999998
          kl: 0.007863938145296364
          policy_loss: -0.023590305282009973
          total_loss: -0.014848218361536662
          vf_explained_var: 0.08127357810735703
          vf_loss: 0.014717889597846402
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_trained: 187

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,187,7353.72,187000,-2.3621,-1.93,-3.88,236.21


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-10-24_18-34-07
  done: false
  episode_len_mean: 236.47
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3646999999999934
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 5
  episodes_total: 609
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012670540809631342
          cur_lr: 5.000000000000001e-05
          entropy: 0.6085451861222585
          entropy_coeff: 0.009999999999999998
          kl: 0.014350235950751116
          policy_loss: -0.02486768290400505
          total_loss: -0.013692852440807554
          vf_explained_var: 0.12511098384857178
          vf_loss: 0.01707845674827695
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 18800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,188,7399.63,188000,-2.3647,-1.93,-3.88,236.47


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-10-24_18-34-54
  done: false
  episode_len_mean: 236.46
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.364599999999993
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 613
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012670540809631342
          cur_lr: 5.000000000000001e-05
          entropy: 0.632629966073566
          entropy_coeff: 0.009999999999999998
          kl: 0.007767911290743454
          policy_loss: 0.01837307032611635
          total_loss: 0.024386065949996313
          vf_explained_var: 0.17588172852993011
          vf_loss: 0.012240872790830002
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained: 189000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,189,7445.64,189000,-2.3646,-1.93,-3.88,236.46


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-10-24_18-35-41
  done: false
  episode_len_mean: 236.32
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3631999999999933
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 617
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012670540809631342
          cur_lr: 5.000000000000001e-05
          entropy: 0.6284446514315075
          entropy_coeff: 0.009999999999999998
          kl: 0.022638851978302277
          policy_loss: 0.014246402680873871
          total_loss: 0.018210846682389578
          vf_explained_var: 0.07262828946113586
          vf_loss: 0.009962041580325199
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 19000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,190,7492.66,190000,-2.3632,-1.93,-3.88,236.32


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-10-24_18-36-27
  done: false
  episode_len_mean: 234.94
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.349399999999994
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 5
  episodes_total: 622
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.019005811214447027
          cur_lr: 5.000000000000001e-05
          entropy: 0.7676175786389245
          entropy_coeff: 0.009999999999999998
          kl: 0.01647363979936548
          policy_loss: -0.01976741510960791
          total_loss: -0.009775611261526743
          vf_explained_var: 0.10101381689310074
          vf_loss: 0.017354884184896946
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained: 191000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,191,7538.71,191000,-2.3494,-1.93,-3.88,234.94


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-10-24_18-37-11
  done: false
  episode_len_mean: 231.57
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3156999999999943
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 626
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.019005811214447027
          cur_lr: 5.000000000000001e-05
          entropy: 0.9145564324325985
          entropy_coeff: 0.009999999999999998
          kl: 0.020015669224046444
          policy_loss: 0.0006752345297071668
          total_loss: 0.005043004535966449
          vf_explained_var: 0.16429084539413452
          vf_loss: 0.01313291990922557
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 192000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,192,7582.79,192000,-2.3157,-1.93,-2.71,231.57




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 193000
  custom_metrics: {}
  date: 2021-10-24_18-38-11
  done: false
  episode_len_mean: 231.03
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3102999999999945
  episode_reward_min: -2.7899999999999845
  episodes_this_iter: 4
  episodes_total: 630
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.028508716821670534
          cur_lr: 5.000000000000001e-05
          entropy: 0.9272669712702434
          entropy_coeff: 0.009999999999999998
          kl: 0.01082726882665952
          policy_loss: 0.017894360837009217
          total_loss: 0.022502647588650386
          vf_explained_var: 0.13510136306285858
          vf_loss: 0.013572282685587803
    num_agent_steps_sampled: 193000
    num_agent_steps_trained: 193000
    num_steps_sampled: 193000
    num_steps_trained: 193000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,193,7642.52,193000,-2.3103,-1.93,-2.79,231.03


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 194000
  custom_metrics: {}
  date: 2021-10-24_18-38-55
  done: false
  episode_len_mean: 232.07
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3206999999999947
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 634
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.028508716821670534
          cur_lr: 5.000000000000001e-05
          entropy: 0.9188309093316396
          entropy_coeff: 0.009999999999999998
          kl: 0.01632054834247138
          policy_loss: -0.00565972559981876
          total_loss: -0.001243094354867935
          vf_explained_var: 0.21287254989147186
          vf_loss: 0.013139661215245724
    num_agent_steps_sampled: 194000
    num_agent_steps_trained: 194000
    num_steps_sampled: 194000
    num_steps_trained: 194000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,194,7686.65,194000,-2.3207,-1.93,-2.85,232.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 195000
  custom_metrics: {}
  date: 2021-10-24_18-39-31
  done: false
  episode_len_mean: 234.73
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.347299999999994
  episode_reward_min: -3.369999999999972
  episodes_this_iter: 3
  episodes_total: 637
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.028508716821670534
          cur_lr: 5.000000000000001e-05
          entropy: 1.316010112894906
          entropy_coeff: 0.009999999999999998
          kl: 0.08040083662320312
          policy_loss: -0.01080848773320516
          total_loss: -0.012912322415245904
          vf_explained_var: 0.4594160318374634
          vf_loss: 0.008764138305559754
    num_agent_steps_sampled: 195000
    num_agent_steps_trained: 195000
    num_steps_sampled: 195000
    num_steps_trained: 195000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,195,7723.4,195000,-2.3473,-1.93,-3.37,234.73


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-10-24_18-40-12
  done: false
  episode_len_mean: 237.08
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.3707999999999934
  episode_reward_min: -3.369999999999972
  episodes_this_iter: 3
  episodes_total: 640
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0427630752325058
          cur_lr: 5.000000000000001e-05
          entropy: 1.393950140476227
          entropy_coeff: 0.009999999999999998
          kl: 0.05706242080901611
          policy_loss: 0.007871840397516887
          total_loss: 0.0028365327252282036
          vf_explained_var: 0.69575434923172
          vf_loss: 0.006464031597392427
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained: 196000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,196,7763.57,196000,-2.3708,-1.99,-3.37,237.08


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 197000
  custom_metrics: {}
  date: 2021-10-24_18-40-48
  done: false
  episode_len_mean: 239.79
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.3978999999999933
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 3
  episodes_total: 643
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06414461284875872
          cur_lr: 5.000000000000001e-05
          entropy: 1.7593380279011197
          entropy_coeff: 0.009999999999999998
          kl: 0.0485515599710912
          policy_loss: -0.0020872016747792562
          total_loss: -0.002774842580159505
          vf_explained_var: 0.1296323835849762
          vf_loss: 0.013791420392226427
    num_agent_steps_sampled: 197000
    num_agent_steps_trained: 197000
    num_steps_sampled: 197000
    num_steps_trained: 197000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,197,7799.85,197000,-2.3979,-1.99,-4.59,239.79


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 198000
  custom_metrics: {}
  date: 2021-10-24_18-41-25
  done: false
  episode_len_mean: 243.87
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.4386999999999923
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 3
  episodes_total: 646
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09621691927313802
          cur_lr: 5.000000000000001e-05
          entropy: 2.061694331963857
          entropy_coeff: 0.009999999999999998
          kl: 0.026644200765761974
          policy_loss: -0.08439666922721598
          total_loss: -0.09193600018819173
          vf_explained_var: 0.4923647940158844
          vf_loss: 0.010513991065737274
    num_agent_steps_sampled: 198000
    num_agent_steps_trained: 198000
    num_steps_sampled: 198000
    num_steps_trained: 198000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,198,7836.76,198000,-2.4387,-1.99,-4.59,243.87


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 199000
  custom_metrics: {}
  date: 2021-10-24_18-42-03
  done: false
  episode_len_mean: 246.93
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.469299999999991
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 3
  episodes_total: 649
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14432537890970706
          cur_lr: 5.000000000000001e-05
          entropy: 1.8607396245002747
          entropy_coeff: 0.009999999999999998
          kl: 0.021642667360747995
          policy_loss: -0.004704933613538742
          total_loss: -0.01359988335106108
          vf_explained_var: 0.8196718096733093
          vf_loss: 0.0065888602079616655
    num_agent_steps_sampled: 199000
    num_agent_steps_trained: 199000
    num_steps_sampled: 199000
    num_steps_trained: 199000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,199,7874.66,199000,-2.4693,-1.99,-4.59,246.93


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-10-24_18-42-44
  done: false
  episode_len_mean: 248.59
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.485899999999991
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 3
  episodes_total: 652
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2164880683645607
          cur_lr: 5.000000000000001e-05
          entropy: 1.2371627456612058
          entropy_coeff: 0.009999999999999998
          kl: 0.01384727709044924
          policy_loss: 0.006019874744945102
          total_loss: 0.005777045918835534
          vf_explained_var: 0.46721649169921875
          vf_loss: 0.009131030914270215
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,200,7915.38,200000,-2.4859,-1.99,-4.59,248.59


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 201000
  custom_metrics: {}
  date: 2021-10-24_18-43-21
  done: false
  episode_len_mean: 252.18
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.5217999999999905
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 3
  episodes_total: 655
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2164880683645607
          cur_lr: 5.000000000000001e-05
          entropy: 1.994199260075887
          entropy_coeff: 0.009999999999999998
          kl: 0.02132857775072994
          policy_loss: 0.07816582438018586
          total_loss: 0.06660867043667369
          vf_explained_var: 0.6813824772834778
          vf_loss: 0.0037674581399187446
    num_agent_steps_sampled: 201000
    num_agent_steps_trained: 201000
    num_steps_sampled: 201000
    num_steps_trained: 201000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,201,7952.83,201000,-2.5218,-1.99,-4.59,252.18


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 202000
  custom_metrics: {}
  date: 2021-10-24_18-44-03
  done: false
  episode_len_mean: 255.34
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.5533999999999892
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 3
  episodes_total: 658
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3247321025468408
          cur_lr: 5.000000000000001e-05
          entropy: 1.8142895738283793
          entropy_coeff: 0.009999999999999998
          kl: 0.017517919036769077
          policy_loss: -0.032083529482285184
          total_loss: -0.04013858065009117
          vf_explained_var: 0.8839249610900879
          vf_loss: 0.004399214951424963
    num_agent_steps_sampled: 202000
    num_agent_steps_trained: 202000
    num_steps_sampled: 202000
    num_steps_trained: 202000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,202,7994.57,202000,-2.5534,-1.99,-4.59,255.34




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 203000
  custom_metrics: {}
  date: 2021-10-24_18-45-07
  done: false
  episode_len_mean: 257.13
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.5712999999999884
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 3
  episodes_total: 661
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3247321025468408
          cur_lr: 5.000000000000001e-05
          entropy: 1.4150471422407362
          entropy_coeff: 0.009999999999999998
          kl: 0.014275657391017068
          policy_loss: -0.11053637564182281
          total_loss: -0.11325518124633366
          vf_explained_var: 0.7978836297988892
          vf_loss: 0.00679590100577722
    num_agent_steps_sampled: 203000
    num_agent_steps_trained: 203000
    num_steps_sampled: 203000
    num_steps_trained: 203000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,203,8058.36,203000,-2.5713,-1.99,-4.59,257.13


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 204000
  custom_metrics: {}
  date: 2021-10-24_18-45-48
  done: false
  episode_len_mean: 259.62
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.5961999999999885
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 4
  episodes_total: 665
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3247321025468408
          cur_lr: 5.000000000000001e-05
          entropy: 1.4995561394426558
          entropy_coeff: 0.009999999999999998
          kl: 0.010975837145205721
          policy_loss: -0.0024806079765160877
          total_loss: -0.004690764678849114
          vf_explained_var: 0.500234842300415
          vf_loss: 0.009221196355712082
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_steps_sampled: 204000
    num_steps_trained: 204000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,204,8099.39,204000,-2.5962,-1.99,-4.59,259.62


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 205000
  custom_metrics: {}
  date: 2021-10-24_18-46-32
  done: false
  episode_len_mean: 260.25
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.6024999999999885
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 4
  episodes_total: 669
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3247321025468408
          cur_lr: 5.000000000000001e-05
          entropy: 0.5593429562118318
          entropy_coeff: 0.009999999999999998
          kl: 0.003921056058200081
          policy_loss: 0.026697491771645016
          total_loss: 0.03333221798141797
          vf_explained_var: 0.2709197700023651
          vf_loss: 0.010954863743649589
    num_agent_steps_sampled: 205000
    num_agent_steps_trained: 205000
    num_steps_sampled: 205000
    num_steps_trained: 205000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,205,8143.65,205000,-2.6025,-1.99,-4.59,260.25


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 206000
  custom_metrics: {}
  date: 2021-10-24_18-47-16
  done: false
  episode_len_mean: 260.81
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.6080999999999888
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 4
  episodes_total: 673
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 0.42781504690647126
          entropy_coeff: 0.009999999999999998
          kl: 0.003636122360333122
          policy_loss: 0.0247984003689554
          total_loss: 0.03242829367518425
          vf_explained_var: 0.22658522427082062
          vf_loss: 0.011317658931430843
    num_agent_steps_sampled: 206000
    num_agent_steps_trained: 206000
    num_steps_sampled: 206000
    num_steps_trained: 206000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,206,8187.67,206000,-2.6081,-1.99,-4.59,260.81


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 207000
  custom_metrics: {}
  date: 2021-10-24_18-48-02
  done: false
  episode_len_mean: 261.95
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.619499999999988
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 4
  episodes_total: 677
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0811830256367102
          cur_lr: 5.000000000000001e-05
          entropy: 0.44097940888669757
          entropy_coeff: 0.009999999999999998
          kl: 0.003066195464099408
          policy_loss: 0.015086771878931258
          total_loss: 0.022254012235336833
          vf_explained_var: 0.22668422758579254
          vf_loss: 0.011328113420555988
    num_agent_steps_sampled: 207000
    num_agent_steps_trained: 207000
    num_steps_sampled: 207000
    num_steps_trained: 207000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,207,8233.65,207000,-2.6195,-1.99,-4.59,261.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-10-24_18-48-51
  done: false
  episode_len_mean: 262.19
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.6218999999999886
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 5
  episodes_total: 682
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0405915128183551
          cur_lr: 5.000000000000001e-05
          entropy: 0.361453025870853
          entropy_coeff: 0.009999999999999998
          kl: 0.0139982519512216
          policy_loss: 0.0006476779778798421
          total_loss: 0.008694110810756684
          vf_explained_var: 0.44483187794685364
          vf_loss: 0.011092749133240432
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 208000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,208,8282.26,208000,-2.6219,-1.99,-4.59,262.19


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 209000
  custom_metrics: {}
  date: 2021-10-24_18-49-36
  done: false
  episode_len_mean: 262.41
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.6240999999999883
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 4
  episodes_total: 686
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0405915128183551
          cur_lr: 5.000000000000001e-05
          entropy: 0.7379716595013937
          entropy_coeff: 0.009999999999999998
          kl: 0.029916801449667928
          policy_loss: 0.0029473221136464014
          total_loss: 0.006970800749129719
          vf_explained_var: 0.3498062491416931
          vf_loss: 0.010188830374843545
    num_agent_steps_sampled: 209000
    num_agent_steps_trained: 209000
    num_steps_sampled: 209000
    num_steps_trained: 209000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,209,8327.94,209000,-2.6241,-1.99,-4.59,262.41




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-10-24_18-50-40
  done: false
  episode_len_mean: 262.21
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.6220999999999886
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 4
  episodes_total: 690
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06088726922753267
          cur_lr: 5.000000000000001e-05
          entropy: 0.6921236660745409
          entropy_coeff: 0.009999999999999998
          kl: 0.0073757153104501245
          policy_loss: 0.0248952174352275
          total_loss: 0.02973578671614329
          vf_explained_var: 0.2208491414785385
          vf_loss: 0.011312718886054224
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_trained: 210000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,210,8391.32,210000,-2.6221,-1.99,-4.59,262.21


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 211000
  custom_metrics: {}
  date: 2021-10-24_18-51-28
  done: false
  episode_len_mean: 262.35
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.6234999999999875
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 4
  episodes_total: 694
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06088726922753267
          cur_lr: 5.000000000000001e-05
          entropy: 0.6275438355074988
          entropy_coeff: 0.009999999999999998
          kl: 0.0042113135153543785
          policy_loss: -0.07284615064660709
          total_loss: -0.06662495351499981
          vf_explained_var: 0.15531288087368011
          vf_loss: 0.012240219302475453
    num_agent_steps_sampled: 211000
    num_agent_steps_trained: 211000
    num_steps_sampled: 211000
    num_steps_trained: 211000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,211,8439.95,211000,-2.6235,-1.99,-4.59,262.35


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2021-10-24_18-52-13
  done: false
  episode_len_mean: 262.62
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.6261999999999874
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 5
  episodes_total: 699
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.030443634613766334
          cur_lr: 5.000000000000001e-05
          entropy: 0.6095259659820133
          entropy_coeff: 0.009999999999999998
          kl: 0.009226484100639393
          policy_loss: -0.01933471883336703
          total_loss: -0.011892273359828525
          vf_explained_var: 0.19974511861801147
          vf_loss: 0.013256819887707631
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_steps_sampled: 212000
    num_steps_trained: 21200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,212,8484.75,212000,-2.6262,-1.99,-4.59,262.62


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 213000
  custom_metrics: {}
  date: 2021-10-24_18-53-00
  done: false
  episode_len_mean: 262.98
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.629799999999987
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 4
  episodes_total: 703
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.030443634613766334
          cur_lr: 5.000000000000001e-05
          entropy: 0.45987020466062756
          entropy_coeff: 0.009999999999999998
          kl: 0.00369747785016759
          policy_loss: 0.04188826220730941
          total_loss: 0.04826273827089204
          vf_explained_var: 0.11992565542459488
          vf_loss: 0.01086061369213793
    num_agent_steps_sampled: 213000
    num_agent_steps_trained: 213000
    num_steps_sampled: 213000
    num_steps_trained: 213000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,213,8531.13,213000,-2.6298,-2.02,-4.59,262.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 214000
  custom_metrics: {}
  date: 2021-10-24_18-53-47
  done: false
  episode_len_mean: 262.95
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.6294999999999873
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 4
  episodes_total: 707
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.015221817306883167
          cur_lr: 5.000000000000001e-05
          entropy: 0.49461933109495376
          entropy_coeff: 0.009999999999999998
          kl: 0.003659737175259023
          policy_loss: -0.03831565396653281
          total_loss: -0.03161435027917226
          vf_explained_var: 0.09566153585910797
          vf_loss: 0.011591786477300857
    num_agent_steps_sampled: 214000
    num_agent_steps_trained: 214000
    num_steps_sampled: 214000
    num_steps_trained: 214000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,214,8578.54,214000,-2.6295,-2.02,-4.59,262.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 215000
  custom_metrics: {}
  date: 2021-10-24_18-54-34
  done: false
  episode_len_mean: 263.01
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.6300999999999872
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 5
  episodes_total: 712
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007610908653441583
          cur_lr: 5.000000000000001e-05
          entropy: 0.5537577463520897
          entropy_coeff: 0.009999999999999998
          kl: 0.008035839133543258
          policy_loss: -0.01731153635515107
          total_loss: -0.00771487048930592
          vf_explained_var: 0.12365838140249252
          vf_loss: 0.015073083796434933
    num_agent_steps_sampled: 215000
    num_agent_steps_trained: 215000
    num_steps_sampled: 215000
    num_steps_trained: 215000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,215,8624.9,215000,-2.6301,-2.02,-4.59,263.01


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2021-10-24_18-55-20
  done: false
  episode_len_mean: 262.94
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.6293999999999875
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 4
  episodes_total: 716
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007610908653441583
          cur_lr: 5.000000000000001e-05
          entropy: 0.6038571496804556
          entropy_coeff: 0.009999999999999998
          kl: 0.011653868234807558
          policy_loss: 0.02444302108552721
          total_loss: 0.03076557873023881
          vf_explained_var: 0.0808708518743515
          vf_loss: 0.012272433336410258
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_steps_sampled: 216000
    num_steps_trained: 216000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,216,8671.46,216000,-2.6294,-2.02,-4.59,262.94




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 217000
  custom_metrics: {}
  date: 2021-10-24_18-56-19
  done: false
  episode_len_mean: 263.38
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.633799999999988
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 4
  episodes_total: 720
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007610908653441583
          cur_lr: 5.000000000000001e-05
          entropy: 0.7933027360174391
          entropy_coeff: 0.009999999999999998
          kl: 0.006515607573835306
          policy_loss: 0.02272933555973901
          total_loss: 0.02795704851547877
          vf_explained_var: 0.051886893808841705
          vf_loss: 0.013111155035181178
    num_agent_steps_sampled: 217000
    num_agent_steps_trained: 217000
    num_steps_sampled: 217000
    num_steps_trained: 217000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,217,8729.91,217000,-2.6338,-2.02,-4.59,263.38


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 218000
  custom_metrics: {}
  date: 2021-10-24_18-57-05
  done: false
  episode_len_mean: 263.76
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.6375999999999875
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 4
  episodes_total: 724
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007610908653441583
          cur_lr: 5.000000000000001e-05
          entropy: 0.8222617023520999
          entropy_coeff: 0.009999999999999998
          kl: 0.010242258980655883
          policy_loss: 0.009249897797902425
          total_loss: 0.014991339958376355
          vf_explained_var: 0.04993564262986183
          vf_loss: 0.013886103106455671
    num_agent_steps_sampled: 218000
    num_agent_steps_trained: 218000
    num_steps_sampled: 218000
    num_steps_trained: 218000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,218,8776.74,218000,-2.6376,-2.02,-4.59,263.76


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 219000
  custom_metrics: {}
  date: 2021-10-24_18-57-48
  done: false
  episode_len_mean: 263.65
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.636499999999988
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 4
  episodes_total: 728
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007610908653441583
          cur_lr: 5.000000000000001e-05
          entropy: 0.8177414092752668
          entropy_coeff: 0.009999999999999998
          kl: 0.010305206454260151
          policy_loss: 0.011821772075361676
          total_loss: 0.01803637444972992
          vf_explained_var: 0.06178653985261917
          vf_loss: 0.014313587442868286
    num_agent_steps_sampled: 219000
    num_agent_steps_trained: 219000
    num_steps_sampled: 219000
    num_steps_trained: 219000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,219,8819.23,219000,-2.6365,-2.02,-4.59,263.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-10-24_18-58-30
  done: false
  episode_len_mean: 263.8
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.6379999999999875
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 4
  episodes_total: 732
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007610908653441583
          cur_lr: 5.000000000000001e-05
          entropy: 0.8405473642879062
          entropy_coeff: 0.009999999999999998
          kl: 0.005906182177972142
          policy_loss: 0.04537933609551854
          total_loss: 0.047818502204285726
          vf_explained_var: 0.07496745139360428
          vf_loss: 0.01079968556554781
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,220,8861.65,220000,-2.638,-2.02,-4.59,263.8


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 221000
  custom_metrics: {}
  date: 2021-10-24_18-59-15
  done: false
  episode_len_mean: 261.81
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.618099999999988
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 4
  episodes_total: 736
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007610908653441583
          cur_lr: 5.000000000000001e-05
          entropy: 0.8628268029954699
          entropy_coeff: 0.009999999999999998
          kl: 0.00702006648269465
          policy_loss: 5.8650639322068954e-05
          total_loss: 0.006226169731881883
          vf_explained_var: 0.06097683310508728
          vf_loss: 0.014742353061834971
    num_agent_steps_sampled: 221000
    num_agent_steps_trained: 221000
    num_steps_sampled: 221000
    num_steps_trained: 221000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,221,8906.42,221000,-2.6181,-2.02,-4.59,261.81


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 222000
  custom_metrics: {}
  date: 2021-10-24_18-59-59
  done: false
  episode_len_mean: 259.65
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.5964999999999883
  episode_reward_min: -4.589999999999947
  episodes_this_iter: 4
  episodes_total: 740
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007610908653441583
          cur_lr: 5.000000000000001e-05
          entropy: 0.9036828908655379
          entropy_coeff: 0.009999999999999998
          kl: 0.008944585641803669
          policy_loss: -0.03375594019889831
          total_loss: -0.027919563154379528
          vf_explained_var: 0.07943053543567657
          vf_loss: 0.01480512780447801
    num_agent_steps_sampled: 222000
    num_agent_steps_trained: 222000
    num_steps_sampled: 222000
    num_steps_trained: 222000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,222,8950.11,222000,-2.5965,-2.02,-4.59,259.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 223000
  custom_metrics: {}
  date: 2021-10-24_19-00-42
  done: false
  episode_len_mean: 255.35
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.553499999999989
  episode_reward_min: -4.159999999999956
  episodes_this_iter: 4
  episodes_total: 744
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007610908653441583
          cur_lr: 5.000000000000001e-05
          entropy: 0.9200397067599826
          entropy_coeff: 0.009999999999999998
          kl: 0.01666319024592554
          policy_loss: -0.06789448062578837
          total_loss: -0.06211842993895213
          vf_explained_var: 0.07976105809211731
          vf_loss: 0.014849627224935425
    num_agent_steps_sampled: 223000
    num_agent_steps_trained: 223000
    num_steps_sampled: 223000
    num_steps_trained: 223000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,223,8992.96,223000,-2.5535,-2.02,-4.16,255.35


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-10-24_19-01-27
  done: false
  episode_len_mean: 251.25
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.51249999999999
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 748
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007610908653441583
          cur_lr: 5.000000000000001e-05
          entropy: 0.8048645655314127
          entropy_coeff: 0.009999999999999998
          kl: 0.007678017476119218
          policy_loss: -0.02179603469040659
          total_loss: -0.015085606359773213
          vf_explained_var: 0.08758226782083511
          vf_loss: 0.014700638668404684
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained: 224000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,224,9037.78,224000,-2.5125,-2.02,-3.88,251.25




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 225000
  custom_metrics: {}
  date: 2021-10-24_19-02-28
  done: false
  episode_len_mean: 247.81
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.4780999999999906
  episode_reward_min: -3.45999999999997
  episodes_this_iter: 5
  episodes_total: 753
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007610908653441583
          cur_lr: 5.000000000000001e-05
          entropy: 0.8499856339560614
          entropy_coeff: 0.009999999999999998
          kl: 0.008006742757172742
          policy_loss: -0.029994958721929126
          total_loss: -0.020122864014572566
          vf_explained_var: 0.14897245168685913
          vf_loss: 0.01831100991823607
    num_agent_steps_sampled: 225000
    num_agent_steps_trained: 225000
    num_steps_sampled: 225000
    num_steps_trained: 225000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,225,9099.18,225000,-2.4781,-2.02,-3.46,247.81


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 226000
  custom_metrics: {}
  date: 2021-10-24_19-03-13
  done: false
  episode_len_mean: 244.1
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.440999999999992
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 757
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007610908653441583
          cur_lr: 5.000000000000001e-05
          entropy: 0.9328307131926219
          entropy_coeff: 0.009999999999999998
          kl: 0.011957294842794322
          policy_loss: -0.0024223184420002833
          total_loss: 0.0004503281580077277
          vf_explained_var: 0.23138099908828735
          vf_loss: 0.01210994983298911
    num_agent_steps_sampled: 226000
    num_agent_steps_trained: 226000
    num_steps_sampled: 226000
    num_steps_trained: 226000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,226,9143.71,226000,-2.441,-2.02,-3.32,244.1


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 227000
  custom_metrics: {}
  date: 2021-10-24_19-03-57
  done: false
  episode_len_mean: 242.34
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.4233999999999924
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 761
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007610908653441583
          cur_lr: 5.000000000000001e-05
          entropy: 1.084268037478129
          entropy_coeff: 0.009999999999999998
          kl: 0.07592340816520486
          policy_loss: 0.016261983331706788
          total_loss: 0.017602105024788116
          vf_explained_var: 0.45061221718788147
          vf_loss: 0.01160495368272273
    num_agent_steps_sampled: 227000
    num_agent_steps_trained: 227000
    num_steps_sampled: 227000
    num_steps_trained: 227000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,227,9187.86,227000,-2.4234,-2.02,-3.26,242.34


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 228000
  custom_metrics: {}
  date: 2021-10-24_19-04-34
  done: false
  episode_len_mean: 243.98
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.508799999999992
  episode_reward_min: -7.599999999999953
  episodes_this_iter: 3
  episodes_total: 764
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011416362980162381
          cur_lr: 5.000000000000001e-05
          entropy: 1.2971425546540154
          entropy_coeff: 0.009999999999999998
          kl: 0.14471555736852867
          policy_loss: 0.09665261937512291
          total_loss: 0.20903645174370872
          vf_explained_var: 0.4226789176464081
          vf_loss: 0.12370313209378057
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_steps_sampled: 228000
    num_steps_trained: 228000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,228,9225.54,228000,-2.5088,-2.02,-7.6,243.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 229000
  custom_metrics: {}
  date: 2021-10-24_19-05-16
  done: false
  episode_len_mean: 245.48
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.5633999999999912
  episode_reward_min: -7.599999999999953
  episodes_this_iter: 3
  episodes_total: 767
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017124544470243558
          cur_lr: 5.000000000000001e-05
          entropy: 1.3914770894580417
          entropy_coeff: 0.009999999999999998
          kl: 0.04357395710617939
          policy_loss: -0.04226797612177001
          total_loss: -0.0035862241354253557
          vf_explained_var: 0.6267326474189758
          vf_loss: 0.05185033957370454
    num_agent_steps_sampled: 229000
    num_agent_steps_trained: 229000
    num_steps_sampled: 229000
    num_steps_trained: 229000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,229,9266.77,229000,-2.5634,-2.02,-7.6,245.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 230000
  custom_metrics: {}
  date: 2021-10-24_19-05-50
  done: false
  episode_len_mean: 248.06
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.6087999999999902
  episode_reward_min: -7.599999999999953
  episodes_this_iter: 3
  episodes_total: 770
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.025686816705365344
          cur_lr: 5.000000000000001e-05
          entropy: 1.217776936954922
          entropy_coeff: 0.009999999999999998
          kl: 0.08807806167048875
          policy_loss: -0.007456800258821911
          total_loss: 0.00930256595214208
          vf_explained_var: 0.2909618020057678
          vf_loss: 0.026674689828521676
    num_agent_steps_sampled: 230000
    num_agent_steps_trained: 230000
    num_steps_sampled: 230000
    num_steps_trained: 230000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,230,9301.3,230000,-2.6088,-2.02,-7.6,248.06


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 231000
  custom_metrics: {}
  date: 2021-10-24_19-06-30
  done: false
  episode_len_mean: 249.62
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.6243999999999903
  episode_reward_min: -7.599999999999953
  episodes_this_iter: 3
  episodes_total: 773
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.038530225058048026
          cur_lr: 5.000000000000001e-05
          entropy: 1.0873932560284933
          entropy_coeff: 0.009999999999999998
          kl: 0.01777777697056642
          policy_loss: -0.0922628038459354
          total_loss: -0.06118991838561164
          vf_explained_var: 0.5045109391212463
          vf_loss: 0.04126183618274
    num_agent_steps_sampled: 231000
    num_agent_steps_trained: 231000
    num_steps_sampled: 231000
    num_steps_trained: 231000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,231,9341.45,231000,-2.6244,-2.02,-7.6,249.62


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 232000
  custom_metrics: {}
  date: 2021-10-24_19-07-06
  done: false
  episode_len_mean: 253.69
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.773199999999989
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 3
  episodes_total: 776
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.038530225058048026
          cur_lr: 5.000000000000001e-05
          entropy: 1.4881185094515483
          entropy_coeff: 0.009999999999999998
          kl: 0.024844689013042594
          policy_loss: -0.09022068464093738
          total_loss: 0.012100181645817228
          vf_explained_var: 0.514546275138855
          vf_loss: 0.11624477807846334
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_steps_sampled: 232000
    num_steps_trained: 232000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,232,9376.49,232000,-2.7732,-2.02,-9.69,253.69


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 233000
  custom_metrics: {}
  date: 2021-10-24_19-07-42
  done: false
  episode_len_mean: 256.2
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.7982999999999896
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 3
  episodes_total: 779
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.057795337587072004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7504903150929345
          entropy_coeff: 0.009999999999999998
          kl: 0.061883834468734696
          policy_loss: -0.06456804598371188
          total_loss: -0.05094008002844122
          vf_explained_var: 0.19160933792591095
          vf_loss: 0.027556270278162428
    num_agent_steps_sampled: 233000
    num_agent_steps_trained: 233000
    num_steps_sampled: 233000
    num_steps_trained: 233000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,233,9412.64,233000,-2.7983,-2.02,-9.69,256.2




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 234000
  custom_metrics: {}
  date: 2021-10-24_19-08-35
  done: false
  episode_len_mean: 258.92
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.825499999999989
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 3
  episodes_total: 782
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08669300638060806
          cur_lr: 5.000000000000001e-05
          entropy: 1.5534624324904547
          entropy_coeff: 0.009999999999999998
          kl: 0.019075406762197468
          policy_loss: 0.11003368645906449
          total_loss: 0.10589563681019677
          vf_explained_var: 0.3628341555595398
          vf_loss: 0.009742871599478854
    num_agent_steps_sampled: 234000
    num_agent_steps_trained: 234000
    num_steps_sampled: 234000
    num_steps_trained: 234000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,234,9466.37,234000,-2.8255,-2.02,-9.69,258.92


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 235000
  custom_metrics: {}
  date: 2021-10-24_19-09-17
  done: false
  episode_len_mean: 260.8
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.844299999999988
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 3
  episodes_total: 785
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08669300638060806
          cur_lr: 5.000000000000001e-05
          entropy: 1.0969549000263215
          entropy_coeff: 0.009999999999999998
          kl: 0.010983043071592154
          policy_loss: -0.12793063401348062
          total_loss: -0.12544476265708607
          vf_explained_var: 0.6183353066444397
          vf_loss: 0.012503268404139413
    num_agent_steps_sampled: 235000
    num_agent_steps_trained: 235000
    num_steps_sampled: 235000
    num_steps_trained: 235000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,235,9508.13,235000,-2.8443,-2.02,-9.69,260.8


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 236000
  custom_metrics: {}
  date: 2021-10-24_19-09-58
  done: false
  episode_len_mean: 261.19
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.848199999999988
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 4
  episodes_total: 789
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08669300638060806
          cur_lr: 5.000000000000001e-05
          entropy: 0.8426578210459815
          entropy_coeff: 0.009999999999999998
          kl: 0.01076236942281036
          policy_loss: -0.052768318520651926
          total_loss: -0.04422719172305531
          vf_explained_var: 0.3281283378601074
          vf_loss: 0.0160346822709673
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_steps_sampled: 236000
    num_steps_trained: 236000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,236,9548.92,236000,-2.8482,-2.02,-9.69,261.19


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 237000
  custom_metrics: {}
  date: 2021-10-24_19-10-38
  done: false
  episode_len_mean: 264.59
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.882199999999987
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 4
  episodes_total: 793
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08669300638060806
          cur_lr: 5.000000000000001e-05
          entropy: 1.2655557228459253
          entropy_coeff: 0.009999999999999998
          kl: 0.018999296556602326
          policy_loss: -0.07290814742445946
          total_loss: -0.06970468196603986
          vf_explained_var: 0.401680588722229
          vf_loss: 0.01421191201855739
    num_agent_steps_sampled: 237000
    num_agent_steps_trained: 237000
    num_steps_sampled: 237000
    num_steps_trained: 237000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,237,9588.73,237000,-2.8822,-2.14,-9.69,264.59


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 238000
  custom_metrics: {}
  date: 2021-10-24_19-11-22
  done: false
  episode_len_mean: 264.75
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.883799999999987
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 4
  episodes_total: 797
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08669300638060806
          cur_lr: 5.000000000000001e-05
          entropy: 0.9704888019296858
          entropy_coeff: 0.009999999999999998
          kl: 0.011593843261051577
          policy_loss: -0.0036838087770673963
          total_loss: -0.0006134175591998631
          vf_explained_var: 0.3832472860813141
          vf_loss: 0.011770176556375292
    num_agent_steps_sampled: 238000
    num_agent_steps_trained: 238000
    num_steps_sampled: 238000
    num_steps_trained: 23800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,238,9632.78,238000,-2.8838,-2.14,-9.69,264.75


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 239000
  custom_metrics: {}
  date: 2021-10-24_19-12-07
  done: false
  episode_len_mean: 265.34
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.889699999999988
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 4
  episodes_total: 801
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08669300638060806
          cur_lr: 5.000000000000001e-05
          entropy: 0.8893053279982672
          entropy_coeff: 0.009999999999999998
          kl: 0.009135116384136823
          policy_loss: -0.003945232555270195
          total_loss: -0.0003843007402287589
          vf_explained_var: 0.28283247351646423
          vf_loss: 0.011662034700728125
    num_agent_steps_sampled: 239000
    num_agent_steps_trained: 239000
    num_steps_sampled: 239000
    num_steps_trained: 23900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,239,9677.32,239000,-2.8897,-2.14,-9.69,265.34


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-10-24_19-12-49
  done: false
  episode_len_mean: 266.59
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.9021999999999872
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 3
  episodes_total: 804
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08669300638060806
          cur_lr: 5.000000000000001e-05
          entropy: 0.890344453520245
          entropy_coeff: 0.009999999999999998
          kl: 0.008280695099927411
          policy_loss: -0.035613466592298616
          total_loss: -0.03406029314630561
          vf_explained_var: 0.5129096508026123
          vf_loss: 0.009738740619892876
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,240,9719.22,240000,-2.9022,-2.14,-9.69,266.59


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 241000
  custom_metrics: {}
  date: 2021-10-24_19-13-35
  done: false
  episode_len_mean: 267.15
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.907799999999986
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 5
  episodes_total: 809
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08669300638060806
          cur_lr: 5.000000000000001e-05
          entropy: 0.6285152031315697
          entropy_coeff: 0.009999999999999998
          kl: 0.006589625483052922
          policy_loss: -0.028121342344416513
          total_loss: -0.018042565799421734
          vf_explained_var: 0.21609322726726532
          vf_loss: 0.015792656503617764
    num_agent_steps_sampled: 241000
    num_agent_steps_trained: 241000
    num_steps_sampled: 241000
    num_steps_trained: 241000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,241,9765.4,241000,-2.9078,-2.14,-9.69,267.15




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 242000
  custom_metrics: {}
  date: 2021-10-24_19-14-39
  done: false
  episode_len_mean: 267.13
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.9075999999999866
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 4
  episodes_total: 813
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08669300638060806
          cur_lr: 5.000000000000001e-05
          entropy: 0.6095351513889101
          entropy_coeff: 0.009999999999999998
          kl: 0.00495769564902149
          policy_loss: 0.008907394938998753
          total_loss: 0.015637441972891488
          vf_explained_var: 0.16582466661930084
          vf_loss: 0.012395597766670916
    num_agent_steps_sampled: 242000
    num_agent_steps_trained: 242000
    num_steps_sampled: 242000
    num_steps_trained: 242000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,242,9829.18,242000,-2.9076,-2.04,-9.69,267.13


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 243000
  custom_metrics: {}
  date: 2021-10-24_19-15-25
  done: false
  episode_len_mean: 267.03
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.9065999999999867
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 4
  episodes_total: 817
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04334650319030403
          cur_lr: 5.000000000000001e-05
          entropy: 0.49653719961643217
          entropy_coeff: 0.009999999999999998
          kl: 0.003054901507605163
          policy_loss: 0.021783596070276367
          total_loss: 0.029517184446255364
          vf_explained_var: 0.06883673369884491
          vf_loss: 0.01256653814473086
    num_agent_steps_sampled: 243000
    num_agent_steps_trained: 243000
    num_steps_sampled: 243000
    num_steps_trained: 243000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,243,9875.91,243000,-2.9066,-2.04,-9.69,267.03


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 244000
  custom_metrics: {}
  date: 2021-10-24_19-16-10
  done: false
  episode_len_mean: 266.47
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.9009999999999874
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 5
  episodes_total: 822
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021673251595152015
          cur_lr: 5.000000000000001e-05
          entropy: 0.5841684970590804
          entropy_coeff: 0.009999999999999998
          kl: 0.010385785924950536
          policy_loss: -0.026759099711974463
          total_loss: -0.015777800066603554
          vf_explained_var: 0.11199813336133957
          vf_loss: 0.016597890274392235
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_steps_sampled: 244000
    num_steps_trained: 2440

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,244,9920.83,244000,-2.901,-2.04,-9.69,266.47


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 245000
  custom_metrics: {}
  date: 2021-10-24_19-16-56
  done: false
  episode_len_mean: 265.96
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.895899999999987
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 4
  episodes_total: 826
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021673251595152015
          cur_lr: 5.000000000000001e-05
          entropy: 0.6769577880700429
          entropy_coeff: 0.009999999999999998
          kl: 0.006655083452372029
          policy_loss: 0.0023048210475179885
          total_loss: 0.006231645536091592
          vf_explained_var: 0.28085389733314514
          vf_loss: 0.01055216347384784
    num_agent_steps_sampled: 245000
    num_agent_steps_trained: 245000
    num_steps_sampled: 245000
    num_steps_trained: 245000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,245,9966.22,245000,-2.8959,-2.04,-9.69,265.96


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 246000
  custom_metrics: {}
  date: 2021-10-24_19-17-42
  done: false
  episode_len_mean: 265.11
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.887399999999987
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 4
  episodes_total: 830
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021673251595152015
          cur_lr: 5.000000000000001e-05
          entropy: 0.6861268324984444
          entropy_coeff: 0.009999999999999998
          kl: 0.1297494279011485
          policy_loss: 0.027682414236995908
          total_loss: 0.031798895034525126
          vf_explained_var: 0.22954262793064117
          vf_loss: 0.008165657088263995
    num_agent_steps_sampled: 246000
    num_agent_steps_trained: 246000
    num_steps_sampled: 246000
    num_steps_trained: 246000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,246,10012.9,246000,-2.8874,-2.04,-9.69,265.11


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 247000
  custom_metrics: {}
  date: 2021-10-24_19-18-29
  done: false
  episode_len_mean: 264.27
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.878999999999987
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 4
  episodes_total: 834
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03250987739272802
          cur_lr: 5.000000000000001e-05
          entropy: 0.37084749870830114
          entropy_coeff: 0.009999999999999998
          kl: 0.00815655931563089
          policy_loss: -0.04158536311652925
          total_loss: -0.03479399399624931
          vf_explained_var: 0.25165244936943054
          vf_loss: 0.010234673962824875
    num_agent_steps_sampled: 247000
    num_agent_steps_trained: 247000
    num_steps_sampled: 247000
    num_steps_trained: 247000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,247,10059.3,247000,-2.879,-2.04,-9.69,264.27


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 248000
  custom_metrics: {}
  date: 2021-10-24_19-19-16
  done: false
  episode_len_mean: 263.57
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.871999999999988
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 5
  episodes_total: 839
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03250987739272802
          cur_lr: 5.000000000000001e-05
          entropy: 0.3811137010653814
          entropy_coeff: 0.009999999999999998
          kl: 0.0032610661278308977
          policy_loss: -0.008536382681793638
          total_loss: 0.0006918330159452227
          vf_explained_var: 0.21920014917850494
          vf_loss: 0.012933334449513091
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_steps_sampled: 248000
    num_steps_trained: 24800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,248,10106.2,248000,-2.872,-2.04,-9.69,263.57




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 249000
  custom_metrics: {}
  date: 2021-10-24_19-20-24
  done: false
  episode_len_mean: 262.44
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.8606999999999876
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 4
  episodes_total: 843
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01625493869636401
          cur_lr: 5.000000000000001e-05
          entropy: 0.30622895095083447
          entropy_coeff: 0.009999999999999998
          kl: 0.0024557733624114854
          policy_loss: -0.005271128399504555
          total_loss: 0.0024434746967421637
          vf_explained_var: 0.13839125633239746
          vf_loss: 0.010736972724811899
    num_agent_steps_sampled: 249000
    num_agent_steps_trained: 249000
    num_steps_sampled: 249000
    num_steps_trained: 249

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,249,10173.9,249000,-2.8607,-1.96,-9.69,262.44


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 250000
  custom_metrics: {}
  date: 2021-10-24_19-21-11
  done: false
  episode_len_mean: 261.23
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.8485999999999883
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 5
  episodes_total: 848
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.008127469348182006
          cur_lr: 5.000000000000001e-05
          entropy: 0.28477647718456056
          entropy_coeff: 0.009999999999999998
          kl: 0.0028541732927724046
          policy_loss: -0.003226112574338913
          total_loss: 0.00827929245101081
          vf_explained_var: 0.10820849984884262
          vf_loss: 0.014329972004310952
    num_agent_steps_sampled: 250000
    num_agent_steps_trained: 250000
    num_steps_sampled: 250000
    num_steps_trained: 2500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,250,10221,250000,-2.8486,-1.96,-9.69,261.23


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 251000
  custom_metrics: {}
  date: 2021-10-24_19-21-57
  done: false
  episode_len_mean: 260.81
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.844399999999988
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 4
  episodes_total: 852
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.004063734674091003
          cur_lr: 5.000000000000001e-05
          entropy: 0.2691988476448589
          entropy_coeff: 0.009999999999999998
          kl: 0.0040635750092101
          policy_loss: -0.015088371601369647
          total_loss: -0.006246287955178155
          vf_explained_var: 0.11297386139631271
          vf_loss: 0.01151755897121297
    num_agent_steps_sampled: 251000
    num_agent_steps_trained: 251000
    num_steps_sampled: 251000
    num_steps_trained: 251000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,251,10267.5,251000,-2.8444,-1.96,-9.69,260.81


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 252000
  custom_metrics: {}
  date: 2021-10-24_19-22-44
  done: false
  episode_len_mean: 259.94
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.835699999999988
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 5
  episodes_total: 857
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0020318673370455015
          cur_lr: 5.000000000000001e-05
          entropy: 0.29852845668792727
          entropy_coeff: 0.009999999999999998
          kl: 0.013160094544323582
          policy_loss: -0.014277338526315159
          total_loss: -0.002235530275437567
          vf_explained_var: 0.11381048709154129
          vf_loss: 0.015000351404564249
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_steps_sampled: 252000
    num_steps_trained: 252

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,252,10314.1,252000,-2.8357,-1.96,-9.69,259.94


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 253000
  custom_metrics: {}
  date: 2021-10-24_19-23-32
  done: false
  episode_len_mean: 258.76
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.8238999999999885
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 4
  episodes_total: 861
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0020318673370455015
          cur_lr: 5.000000000000001e-05
          entropy: 0.21343368772003385
          entropy_coeff: 0.009999999999999998
          kl: 0.0013887243620253286
          policy_loss: 0.02477064521776305
          total_loss: 0.034235475460688274
          vf_explained_var: 0.05613525211811066
          vf_loss: 0.011596339433971379
    num_agent_steps_sampled: 253000
    num_agent_steps_trained: 253000
    num_steps_sampled: 253000
    num_steps_trained: 2530

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,253,10362,253000,-2.8239,-1.96,-9.69,258.76


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 254000
  custom_metrics: {}
  date: 2021-10-24_19-24-19
  done: false
  episode_len_mean: 253.07
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.6583999999999897
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 5
  episodes_total: 866
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010159336685227507
          cur_lr: 5.000000000000001e-05
          entropy: 0.21052759256627823
          entropy_coeff: 0.009999999999999998
          kl: 0.0032250223030402085
          policy_loss: -0.013493896027406057
          total_loss: 0.00042151792181862723
          vf_explained_var: 0.0790146067738533
          vf_loss: 0.016017410138414966
    num_agent_steps_sampled: 254000
    num_agent_steps_trained: 254000
    num_steps_sampled: 254000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,254,10408.9,254000,-2.6584,-1.96,-9.69,253.07




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 255000
  custom_metrics: {}
  date: 2021-10-24_19-25-22
  done: false
  episode_len_mean: 249.24
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.6004999999999905
  episode_reward_min: -9.689999999999966
  episodes_this_iter: 4
  episodes_total: 870
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005079668342613754
          cur_lr: 5.000000000000001e-05
          entropy: 0.2557433346907298
          entropy_coeff: 0.009999999999999998
          kl: 0.002842059696232211
          policy_loss: 0.0010984586758746042
          total_loss: 0.010701420406500498
          vf_explained_var: 0.08881694823503494
          vf_loss: 0.012158955437027746
    num_agent_steps_sampled: 255000
    num_agent_steps_trained: 255000
    num_steps_sampled: 255000
    num_steps_trained: 2550

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,255,10472.7,255000,-2.6005,-1.96,-9.69,249.24


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 256000
  custom_metrics: {}
  date: 2021-10-24_19-26-13
  done: false
  episode_len_mean: 244.16
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.490299999999992
  episode_reward_min: -9.219999999999954
  episodes_this_iter: 5
  episodes_total: 875
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002539834171306877
          cur_lr: 5.000000000000001e-05
          entropy: 0.3075644117262628
          entropy_coeff: 0.009999999999999998
          kl: 0.01635175266251289
          policy_loss: -0.016224619415071275
          total_loss: -0.0037875643206967246
          vf_explained_var: 0.08695067465305328
          vf_loss: 0.015508548532509141
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_steps_sampled: 256000
    num_steps_trained: 2560

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,256,10522.8,256000,-2.4903,-1.96,-9.22,244.16


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 257000
  custom_metrics: {}
  date: 2021-10-24_19-27-02
  done: false
  episode_len_mean: 239.0
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.3899999999999926
  episode_reward_min: -4.869999999999941
  episodes_this_iter: 4
  episodes_total: 879
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002539834171306877
          cur_lr: 5.000000000000001e-05
          entropy: 0.22801866233348847
          entropy_coeff: 0.009999999999999998
          kl: 0.013634036998527143
          policy_loss: 0.023693342092964385
          total_loss: 0.03241106818119685
          vf_explained_var: 0.04576752334833145
          vf_loss: 0.010994453836853305
    num_agent_steps_sampled: 257000
    num_agent_steps_trained: 257000
    num_steps_sampled: 257000
    num_steps_trained: 257000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,257,10572.3,257000,-2.39,-1.96,-4.87,239


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 258000
  custom_metrics: {}
  date: 2021-10-24_19-27-49
  done: false
  episode_len_mean: 234.36
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.3435999999999937
  episode_reward_min: -4.869999999999941
  episodes_this_iter: 5
  episodes_total: 884
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002539834171306877
          cur_lr: 5.000000000000001e-05
          entropy: 0.260013175672955
          entropy_coeff: 0.009999999999999998
          kl: 0.0074034078049401665
          policy_loss: -0.015451353953944312
          total_loss: -0.0013587145341767205
          vf_explained_var: 0.06407253444194794
          vf_loss: 0.01669089292279548
    num_agent_steps_sampled: 258000
    num_agent_steps_trained: 258000
    num_steps_sampled: 258000
    num_steps_trained: 258

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,258,10618.8,258000,-2.3436,-1.96,-4.87,234.36


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 259000
  custom_metrics: {}
  date: 2021-10-24_19-28-36
  done: false
  episode_len_mean: 233.24
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.332399999999994
  episode_reward_min: -4.869999999999941
  episodes_this_iter: 4
  episodes_total: 888
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002539834171306877
          cur_lr: 5.000000000000001e-05
          entropy: 0.19958315342664718
          entropy_coeff: 0.009999999999999998
          kl: 0.007457108303495532
          policy_loss: 0.030897624790668488
          total_loss: 0.0394672359029452
          vf_explained_var: 0.034183189272880554
          vf_loss: 0.01056355035656856
    num_agent_steps_sampled: 259000
    num_agent_steps_trained: 259000
    num_steps_sampled: 259000
    num_steps_trained: 259000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,259,10666.6,259000,-2.3324,-1.96,-4.87,233.24


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-10-24_19-29-24
  done: false
  episode_len_mean: 229.45
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.294499999999995
  episode_reward_min: -3.4499999999999704
  episodes_this_iter: 5
  episodes_total: 893
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002539834171306877
          cur_lr: 5.000000000000001e-05
          entropy: 0.2416653005613221
          entropy_coeff: 0.009999999999999998
          kl: 0.014934252112464838
          policy_loss: -0.021056912756628462
          total_loss: -0.00832220506336954
          vf_explained_var: 0.17078442871570587
          vf_loss: 0.015147567074745894
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 2600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,260,10713.7,260000,-2.2945,-1.96,-3.45,229.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 261000
  custom_metrics: {}
  date: 2021-10-24_19-30-13
  done: false
  episode_len_mean: 228.62
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.286199999999995
  episode_reward_min: -3.4499999999999704
  episodes_this_iter: 4
  episodes_total: 897
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002539834171306877
          cur_lr: 5.000000000000001e-05
          entropy: 0.13782814360327192
          entropy_coeff: 0.009999999999999998
          kl: 0.002613032732370524
          policy_loss: 0.025389760981003442
          total_loss: 0.03493973008460469
          vf_explained_var: 0.17457182705402374
          vf_loss: 0.010927584337898427
    num_agent_steps_sampled: 261000
    num_agent_steps_trained: 261000
    num_steps_sampled: 261000
    num_steps_trained: 26100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,261,10762.6,261000,-2.2862,-1.96,-3.45,228.62




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 262000
  custom_metrics: {}
  date: 2021-10-24_19-31-17
  done: false
  episode_len_mean: 227.1
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.270999999999995
  episode_reward_min: -3.4499999999999704
  episodes_this_iter: 5
  episodes_total: 902
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00012699170856534384
          cur_lr: 5.000000000000001e-05
          entropy: 0.1056301491955916
          entropy_coeff: 0.009999999999999998
          kl: 0.0009092766799485513
          policy_loss: -0.01195536173052258
          total_loss: 0.0004575376709302266
          vf_explained_var: 0.10091984272003174
          vf_loss: 0.013469087953368823
    num_agent_steps_sampled: 262000
    num_agent_steps_trained: 262000
    num_steps_sampled: 262000
    num_steps_trained: 262

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,262,10827.4,262000,-2.271,-1.94,-3.45,227.1


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 263000
  custom_metrics: {}
  date: 2021-10-24_19-32-07
  done: false
  episode_len_mean: 225.03
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.2502999999999957
  episode_reward_min: -2.5099999999999905
  episodes_this_iter: 4
  episodes_total: 906
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.349585428267192e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.14572602311770122
          entropy_coeff: 0.009999999999999998
          kl: 0.01056427082365552
          policy_loss: -0.01875354117817349
          total_loss: -0.009044563356373046
          vf_explained_var: 0.06868137419223785
          vf_loss: 0.011165567632350657
    num_agent_steps_sampled: 263000
    num_agent_steps_trained: 263000
    num_steps_sampled: 263000
    num_steps_trained: 263

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,263,10876.6,263000,-2.2503,-1.94,-2.51,225.03


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 264000
  custom_metrics: {}
  date: 2021-10-24_19-32-55
  done: false
  episode_len_mean: 224.48
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.244799999999996
  episode_reward_min: -2.4099999999999926
  episodes_this_iter: 5
  episodes_total: 911
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.349585428267192e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.1134965058002207
          entropy_coeff: 0.009999999999999998
          kl: 0.00126967913318323
          policy_loss: -0.0021240323781967162
          total_loss: 0.012240111745066113
          vf_explained_var: 0.03856035694479942
          vf_loss: 0.015499032206005521
    num_agent_steps_sampled: 264000
    num_agent_steps_trained: 264000
    num_steps_sampled: 264000
    num_steps_trained: 2640

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,264,10925,264000,-2.2448,-1.94,-2.41,224.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 265000
  custom_metrics: {}
  date: 2021-10-24_19-33-42
  done: false
  episode_len_mean: 224.07
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.240699999999996
  episode_reward_min: -2.4099999999999926
  episodes_this_iter: 4
  episodes_total: 915
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.174792714133596e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.12099302427636252
          entropy_coeff: 0.009999999999999998
          kl: 0.002132398670512704
          policy_loss: -0.03211518252889315
          total_loss: -0.02093238482872645
          vf_explained_var: 0.06757913529872894
          vf_loss: 0.012392667649934689
    num_agent_steps_sampled: 265000
    num_agent_steps_trained: 265000
    num_steps_sampled: 265000
    num_steps_trained: 2650

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,265,10971.6,265000,-2.2407,-1.94,-2.41,224.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 266000
  custom_metrics: {}
  date: 2021-10-24_19-34-30
  done: false
  episode_len_mean: 223.58
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.2357999999999962
  episode_reward_min: -2.4099999999999926
  episodes_this_iter: 5
  episodes_total: 920
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.587396357066798e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.15125469557113116
          entropy_coeff: 0.009999999999999998
          kl: 0.003224167833599662
          policy_loss: 0.010500889188713497
          total_loss: 0.024431253597140313
          vf_explained_var: 0.059832267463207245
          vf_loss: 0.015442858626031213
    num_agent_steps_sampled: 266000
    num_agent_steps_trained: 266000
    num_steps_sampled: 266000
    num_steps_trained: 26

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,266,11020.4,266000,-2.2358,-1.94,-2.41,223.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 267000
  custom_metrics: {}
  date: 2021-10-24_19-35-20
  done: false
  episode_len_mean: 222.96
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.229599999999996
  episode_reward_min: -2.4099999999999926
  episodes_this_iter: 5
  episodes_total: 925
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.93698178533399e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.14914493809143703
          entropy_coeff: 0.009999999999999998
          kl: 0.0013139075908651105
          policy_loss: -0.0017251199318303002
          total_loss: 0.013149309241109424
          vf_explained_var: 0.038276467472314835
          vf_loss: 0.01636587047121591
    num_agent_steps_sampled: 267000
    num_agent_steps_trained: 267000
    num_steps_sampled: 267000
    num_steps_trained: 26

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,267,11069.5,267000,-2.2296,-1.94,-2.41,222.96


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 268000
  custom_metrics: {}
  date: 2021-10-24_19-36-08
  done: false
  episode_len_mean: 222.51
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.2250999999999963
  episode_reward_min: -2.389999999999993
  episodes_this_iter: 4
  episodes_total: 929
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.968490892666995e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.2069178420636389
          entropy_coeff: 0.009999999999999998
          kl: 0.0007380364746681936
          policy_loss: 0.030315113564332325
          total_loss: 0.041589694304598704
          vf_explained_var: 0.05459923297166824
          vf_loss: 0.01334375324141648
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_steps_sampled: 268000
    num_steps_trained: 26800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,268,11117.5,268000,-2.2251,-1.94,-2.39,222.51




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 269000
  custom_metrics: {}
  date: 2021-10-24_19-37-13
  done: false
  episode_len_mean: 221.8
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.217999999999997
  episode_reward_min: -2.3299999999999943
  episodes_this_iter: 5
  episodes_total: 934
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9842454463334975e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.2723977155155606
          entropy_coeff: 0.009999999999999998
          kl: 0.0068685646003707745
          policy_loss: -0.026774460905128056
          total_loss: -0.012599053192469808
          vf_explained_var: 0.10115284472703934
          vf_loss: 0.016899370671146444
    num_agent_steps_sampled: 269000
    num_agent_steps_trained: 269000
    num_steps_sampled: 269000
    num_steps_trained: 26

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,269,11182.5,269000,-2.218,-1.93,-2.33,221.8


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 270000
  custom_metrics: {}
  date: 2021-10-24_19-38-02
  done: false
  episode_len_mean: 221.73
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2172999999999967
  episode_reward_min: -2.3299999999999943
  episodes_this_iter: 4
  episodes_total: 938
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9842454463334975e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.3521517899301317
          entropy_coeff: 0.009999999999999998
          kl: 0.02570536116065101
          policy_loss: 0.03641076758503914
          total_loss: 0.04497098997235298
          vf_explained_var: 0.14060752093791962
          vf_loss: 0.012081687587002914
    num_agent_steps_sampled: 270000
    num_agent_steps_trained: 270000
    num_steps_sampled: 270000
    num_steps_trained: 270000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,270,11231.4,270000,-2.2173,-1.93,-2.33,221.73


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 271000
  custom_metrics: {}
  date: 2021-10-24_19-38-48
  done: false
  episode_len_mean: 222.02
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2201999999999966
  episode_reward_min: -2.3299999999999943
  episodes_this_iter: 5
  episodes_total: 943
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.976368169500245e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.3720501532157262
          entropy_coeff: 0.009999999999999998
          kl: 0.0074877240087355785
          policy_loss: 0.004489212814304564
          total_loss: 0.01613814193341467
          vf_explained_var: 0.08601854741573334
          vf_loss: 0.015369415552251868
    num_agent_steps_sampled: 271000
    num_agent_steps_trained: 271000
    num_steps_sampled: 271000
    num_steps_trained: 2710

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,271,11278.2,271000,-2.2202,-1.93,-2.33,222.02


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 272000
  custom_metrics: {}
  date: 2021-10-24_19-39-33
  done: false
  episode_len_mean: 222.3
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2229999999999963
  episode_reward_min: -2.3699999999999934
  episodes_this_iter: 4
  episodes_total: 947
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.976368169500245e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5810090065002441
          entropy_coeff: 0.009999999999999998
          kl: 0.009668350391289436
          policy_loss: 0.011935290528668298
          total_loss: 0.019328115673528778
          vf_explained_var: 0.15509921312332153
          vf_loss: 0.013202888311611281
    num_agent_steps_sampled: 272000
    num_agent_steps_trained: 272000
    num_steps_sampled: 272000
    num_steps_trained: 27200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,272,11322.3,272000,-2.223,-1.93,-2.37,222.3


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 273000
  custom_metrics: {}
  date: 2021-10-24_19-40-10
  done: false
  episode_len_mean: 225.17
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.250899999999996
  episode_reward_min: -4.879999999999942
  episodes_this_iter: 3
  episodes_total: 950
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.976368169500245e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1015467670228747
          entropy_coeff: 0.009999999999999998
          kl: 0.08697173015865758
          policy_loss: -0.026178974989387725
          total_loss: -0.014894276112318038
          vf_explained_var: 0.3472690284252167
          vf_loss: 0.022299906621790596
    num_agent_steps_sampled: 273000
    num_agent_steps_trained: 273000
    num_steps_sampled: 273000
    num_steps_trained: 273000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,273,11359.7,273000,-2.2509,-1.93,-4.88,225.17


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 274000
  custom_metrics: {}
  date: 2021-10-24_19-40-54
  done: false
  episode_len_mean: 226.13
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.260499999999996
  episode_reward_min: -4.879999999999942
  episodes_this_iter: 4
  episodes_total: 954
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.46455225425037e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8642019550005595
          entropy_coeff: 0.009999999999999998
          kl: 0.03560994550135127
          policy_loss: 0.004765431086222331
          total_loss: 0.009064631412426631
          vf_explained_var: 0.2762772738933563
          vf_loss: 0.012941067303634353
    num_agent_steps_sampled: 274000
    num_agent_steps_trained: 274000
    num_steps_sampled: 274000
    num_steps_trained: 274000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,274,11403.8,274000,-2.2605,-1.93,-4.88,226.13


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 275000
  custom_metrics: {}
  date: 2021-10-24_19-41-38
  done: false
  episode_len_mean: 227.01
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2692999999999954
  episode_reward_min: -4.879999999999942
  episodes_this_iter: 4
  episodes_total: 958
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.6968283813755536e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6942366864946153
          entropy_coeff: 0.009999999999999998
          kl: 0.005957880148463978
          policy_loss: 0.03823063489463594
          total_loss: 0.04338439487748676
          vf_explained_var: 0.21256908774375916
          vf_loss: 0.012096090314702854
    num_agent_steps_sampled: 275000
    num_agent_steps_trained: 275000
    num_steps_sampled: 275000
    num_steps_trained: 275000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,275,11447.9,275000,-2.2693,-1.93,-4.88,227.01




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 276000
  custom_metrics: {}
  date: 2021-10-24_19-42-37
  done: false
  episode_len_mean: 228.84
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.256899999999996
  episode_reward_min: -4.879999999999942
  episodes_this_iter: 4
  episodes_total: 962
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.6968283813755536e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9848527762624952
          entropy_coeff: 0.009999999999999998
          kl: 0.0215311810670216
          policy_loss: -0.09379729992813535
          total_loss: -0.07398749565084775
          vf_explained_var: 0.5086864829063416
          vf_loss: 0.029658188267300527
    num_agent_steps_sampled: 276000
    num_agent_steps_trained: 276000
    num_steps_sampled: 276000
    num_steps_trained: 276000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,276,11507,276000,-2.2569,-0.77,-4.88,228.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 277000
  custom_metrics: {}
  date: 2021-10-24_19-43-15
  done: false
  episode_len_mean: 231.57
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.303299999999995
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 3
  episodes_total: 965
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0045242572063328e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.2280998362435236
          entropy_coeff: 0.009999999999999998
          kl: 0.04149306092077314
          policy_loss: 0.014730246199501886
          total_loss: 0.02630832592646281
          vf_explained_var: 0.407381147146225
          vf_loss: 0.023858664733254247
    num_agent_steps_sampled: 277000
    num_agent_steps_trained: 277000
    num_steps_sampled: 277000
    num_steps_trained: 277000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,277,11544.4,277000,-2.3033,-0.77,-6.33,231.57


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 278000
  custom_metrics: {}
  date: 2021-10-24_19-44-00
  done: false
  episode_len_mean: 232.31
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.3106999999999944
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 4
  episodes_total: 969
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5067863858094995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6536852495537864
          entropy_coeff: 0.009999999999999998
          kl: 0.009628165681583865
          policy_loss: 0.013474383784665003
          total_loss: 0.01912880465388298
          vf_explained_var: 0.21647930145263672
          vf_loss: 0.012191130593419076
    num_agent_steps_sampled: 278000
    num_agent_steps_trained: 278000
    num_steps_sampled: 278000
    num_steps_trained: 27800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,278,11589.6,278000,-2.3107,-0.77,-6.33,232.31


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 279000
  custom_metrics: {}
  date: 2021-10-24_19-44-45
  done: false
  episode_len_mean: 233.32
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.320799999999995
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 4
  episodes_total: 973
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5067863858094995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6488119370407528
          entropy_coeff: 0.009999999999999998
          kl: 0.016395565005881945
          policy_loss: -0.01617110806206862
          total_loss: -0.01025041358338462
          vf_explained_var: 0.17735189199447632
          vf_loss: 0.012408569299926361
    num_agent_steps_sampled: 279000
    num_agent_steps_trained: 279000
    num_steps_sampled: 279000
    num_steps_trained: 27900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,279,11634.6,279000,-2.3208,-0.77,-6.33,233.32


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-10-24_19-45-29
  done: false
  episode_len_mean: 233.96
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.327199999999994
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 4
  episodes_total: 977
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5067863858094995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5259640832742055
          entropy_coeff: 0.009999999999999998
          kl: 0.006815235631313657
          policy_loss: -0.04759118134776751
          total_loss: -0.040006960680087404
          vf_explained_var: 0.11295594274997711
          vf_loss: 0.012843760061595174
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 2800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,280,11678.1,280000,-2.3272,-0.77,-6.33,233.96


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 281000
  custom_metrics: {}
  date: 2021-10-24_19-46-14
  done: false
  episode_len_mean: 234.39
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.3314999999999944
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 5
  episodes_total: 982
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5067863858094995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.535767885711458
          entropy_coeff: 0.009999999999999998
          kl: 0.003205172445365889
          policy_loss: 0.0010193711353672875
          total_loss: 0.011009486930237875
          vf_explained_var: 0.13078570365905762
          vf_loss: 0.015347751178261307
    num_agent_steps_sampled: 281000
    num_agent_steps_trained: 281000
    num_steps_sampled: 281000
    num_steps_trained: 2810

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,281,11723.7,281000,-2.3315,-0.77,-6.33,234.39


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 282000
  custom_metrics: {}
  date: 2021-10-24_19-47-01
  done: false
  episode_len_mean: 235.19
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.3394999999999944
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 4
  episodes_total: 986
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.5339319290474975e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6361265477206972
          entropy_coeff: 0.009999999999999998
          kl: 0.012874942487420347
          policy_loss: 0.025002066294352213
          total_loss: 0.030843796167108747
          vf_explained_var: 0.1717735230922699
          vf_loss: 0.012202901010298067
    num_agent_steps_sampled: 282000
    num_agent_steps_trained: 282000
    num_steps_sampled: 282000
    num_steps_trained: 28200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,282,11770,282000,-2.3395,-0.77,-6.33,235.19




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 283000
  custom_metrics: {}
  date: 2021-10-24_19-48-03
  done: false
  episode_len_mean: 235.46
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.342199999999994
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 4
  episodes_total: 990
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.5339319290474975e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5282407555315229
          entropy_coeff: 0.009999999999999998
          kl: 0.004433455440373477
          policy_loss: 0.021305214365323386
          total_loss: 0.026793252759509615
          vf_explained_var: 0.1799878180027008
          vf_loss: 0.010770417832665974
    num_agent_steps_sampled: 283000
    num_agent_steps_trained: 283000
    num_steps_sampled: 283000
    num_steps_trained: 283000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,283,11832.9,283000,-2.3422,-0.77,-6.33,235.46


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 284000
  custom_metrics: {}
  date: 2021-10-24_19-48-51
  done: false
  episode_len_mean: 236.1
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.348599999999994
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 5
  episodes_total: 995
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7669659645237487e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.45599789420763653
          entropy_coeff: 0.009999999999999998
          kl: 0.0066566855420914645
          policy_loss: -0.023529251168171565
          total_loss: -0.01350878088010682
          vf_explained_var: 0.15237580239772797
          vf_loss: 0.014580430814789401
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_steps_sampled: 284000
    num_steps_trained: 284

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,284,11880.1,284000,-2.3486,-0.77,-6.33,236.1


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 285000
  custom_metrics: {}
  date: 2021-10-24_19-49-38
  done: false
  episode_len_mean: 236.42
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.3517999999999937
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 4
  episodes_total: 999
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7669659645237487e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.42578860289520687
          entropy_coeff: 0.009999999999999998
          kl: 0.003786960536095465
          policy_loss: 0.01595881200499005
          total_loss: 0.023658345970842574
          vf_explained_var: 0.10355404019355774
          vf_loss: 0.011957406490627263
    num_agent_steps_sampled: 285000
    num_agent_steps_trained: 285000
    num_steps_sampled: 285000
    num_steps_trained: 2850

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,285,11926.9,285000,-2.3518,-0.77,-6.33,236.42


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 286000
  custom_metrics: {}
  date: 2021-10-24_19-50-25
  done: false
  episode_len_mean: 237.03
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.3578999999999937
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 4
  episodes_total: 1003
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8834829822618744e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.4815930114852058
          entropy_coeff: 0.009999999999999998
          kl: 0.0379254034269105
          policy_loss: 0.026116355094644758
          total_loss: 0.032014763438039356
          vf_explained_var: 0.11754444241523743
          vf_loss: 0.010714269760582183
    num_agent_steps_sampled: 286000
    num_agent_steps_trained: 286000
    num_steps_sampled: 286000
    num_steps_trained: 28600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,286,11974.6,286000,-2.3579,-0.77,-6.33,237.03


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 287000
  custom_metrics: {}
  date: 2021-10-24_19-51-09
  done: false
  episode_len_mean: 238.02
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.3677999999999932
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 4
  episodes_total: 1007
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.825224473392811e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.557634879483117
          entropy_coeff: 0.009999999999999998
          kl: 0.017260998901707108
          policy_loss: -0.03024188627799352
          total_loss: -0.023501496182547675
          vf_explained_var: 0.09742842614650726
          vf_loss: 0.01231669137875239
    num_agent_steps_sampled: 287000
    num_agent_steps_trained: 287000
    num_steps_sampled: 287000
    num_steps_trained: 28700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,287,12018.3,287000,-2.3678,-0.77,-6.33,238.02


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 288000
  custom_metrics: {}
  date: 2021-10-24_19-51-53
  done: false
  episode_len_mean: 239.13
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.378899999999993
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 4
  episodes_total: 1011
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.825224473392811e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6246814131736755
          entropy_coeff: 0.009999999999999998
          kl: 0.005411189134431036
          policy_loss: -0.0613163024187088
          total_loss: -0.055544797538055316
          vf_explained_var: 0.21418452262878418
          vf_loss: 0.012018303790440162
    num_agent_steps_sampled: 288000
    num_agent_steps_trained: 288000
    num_steps_sampled: 288000
    num_steps_trained: 28800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,288,12062.1,288000,-2.3789,-0.77,-6.33,239.13


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 289000
  custom_metrics: {}
  date: 2021-10-24_19-52-37
  done: false
  episode_len_mean: 239.89
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.3864999999999936
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 4
  episodes_total: 1015
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.825224473392811e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5990993824270037
          entropy_coeff: 0.009999999999999998
          kl: 0.00465969640208649
          policy_loss: -0.060322142640749615
          total_loss: -0.05228267568680975
          vf_explained_var: 0.1754726618528366
          vf_loss: 0.01403045231062505
    num_agent_steps_sampled: 289000
    num_agent_steps_trained: 289000
    num_steps_sampled: 289000
    num_steps_trained: 289000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,289,12106.4,289000,-2.3865,-0.77,-6.33,239.89




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 290000
  custom_metrics: {}
  date: 2021-10-24_19-53-39
  done: false
  episode_len_mean: 240.65
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.394099999999993
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 5
  episodes_total: 1020
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4126122366964055e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6335816853576236
          entropy_coeff: 0.009999999999999998
          kl: 0.007570678726863594
          policy_loss: 0.0018696926948097018
          total_loss: 0.010784644674923684
          vf_explained_var: 0.20661213994026184
          vf_loss: 0.015250765459818972
    num_agent_steps_sampled: 290000
    num_agent_steps_trained: 290000
    num_steps_sampled: 290000
    num_steps_trained: 290

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,290,12168.1,290000,-2.3941,-0.77,-6.33,240.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 291000
  custom_metrics: {}
  date: 2021-10-24_19-54-22
  done: false
  episode_len_mean: 242.09
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.408499999999992
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 4
  episodes_total: 1024
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4126122366964055e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9498567342758178
          entropy_coeff: 0.009999999999999998
          kl: 0.02714124838875181
          policy_loss: 0.02416615883509318
          total_loss: 0.025193128320905898
          vf_explained_var: 0.1575501412153244
          vf_loss: 0.010525499987933371
    num_agent_steps_sampled: 291000
    num_agent_steps_trained: 291000
    num_steps_sampled: 291000
    num_steps_trained: 291000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,291,12211.3,291000,-2.4085,-0.77,-6.33,242.09


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 292000
  custom_metrics: {}
  date: 2021-10-24_19-54-59
  done: false
  episode_len_mean: 244.28
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.430399999999992
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 3
  episodes_total: 1027
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.118918355044608e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.4338121626112197
          entropy_coeff: 0.009999999999999998
          kl: 0.014821197907360705
          policy_loss: 0.052821536113818485
          total_loss: 0.04724739119410515
          vf_explained_var: -0.0602298304438591
          vf_loss: 0.00876394905240482
    num_agent_steps_sampled: 292000
    num_agent_steps_trained: 292000
    num_steps_sampled: 292000
    num_steps_trained: 292000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,292,12248.4,292000,-2.4304,-0.77,-6.33,244.28


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 293000
  custom_metrics: {}
  date: 2021-10-24_19-55-36
  done: false
  episode_len_mean: 247.19
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.4594999999999914
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 3
  episodes_total: 1030
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.118918355044608e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.5608931236796908
          entropy_coeff: 0.009999999999999998
          kl: 0.014898224111932439
          policy_loss: 0.007659045358498891
          total_loss: 0.0009624015953805711
          vf_explained_var: -0.04460933431982994
          vf_loss: 0.008912259256208522
    num_agent_steps_sampled: 293000
    num_agent_steps_trained: 293000
    num_steps_sampled: 293000
    num_steps_trained: 29

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,293,12285.4,293000,-2.4595,-0.77,-6.33,247.19


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 294000
  custom_metrics: {}
  date: 2021-10-24_19-56-13
  done: false
  episode_len_mean: 250.28
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.490399999999991
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 4
  episodes_total: 1034
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.118918355044608e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.480004824532403
          entropy_coeff: 0.009999999999999998
          kl: 0.010201115528772044
          policy_loss: -0.0018893853657775456
          total_loss: -0.002787206487523185
          vf_explained_var: 0.041853759437799454
          vf_loss: 0.013902210019942787
    num_agent_steps_sampled: 294000
    num_agent_steps_trained: 294000
    num_steps_sampled: 294000
    num_steps_trained: 29

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,294,12322.3,294000,-2.4904,-0.77,-6.33,250.28


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 295000
  custom_metrics: {}
  date: 2021-10-24_19-56-50
  done: false
  episode_len_mean: 252.59
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.5134999999999907
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 3
  episodes_total: 1037
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.118918355044608e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3941930201318529
          entropy_coeff: 0.009999999999999998
          kl: 0.007770143539524455
          policy_loss: 0.05149573807915052
          total_loss: 0.04805582463741302
          vf_explained_var: -0.14021119475364685
          vf_loss: 0.01050200408434547
    num_agent_steps_sampled: 295000
    num_agent_steps_trained: 295000
    num_steps_sampled: 295000
    num_steps_trained: 295000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,295,12359,295000,-2.5135,-0.77,-6.33,252.59


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 296000
  custom_metrics: {}
  date: 2021-10-24_19-57-24
  done: false
  episode_len_mean: 254.78
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.5353999999999903
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 3
  episodes_total: 1040
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.118918355044608e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.4131205201148986
          entropy_coeff: 0.009999999999999998
          kl: 0.010022141775382604
          policy_loss: 0.018085084110498428
          total_loss: 0.014734068430132335
          vf_explained_var: -0.0627363845705986
          vf_loss: 0.010780169043250175
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_steps_sampled: 296000
    num_steps_trained: 2960

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,296,12393.5,296000,-2.5354,-0.77,-6.33,254.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 297000
  custom_metrics: {}
  date: 2021-10-24_19-58-01
  done: false
  episode_len_mean: 257.03
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.55789999999999
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 3
  episodes_total: 1043
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.118918355044608e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3928688923517862
          entropy_coeff: 0.009999999999999998
          kl: 0.020575339250159492
          policy_loss: -0.12029349009195964
          total_loss: -0.11870255851083332
          vf_explained_var: 0.0651683583855629
          vf_loss: 0.015519574977871445
    num_agent_steps_sampled: 297000
    num_agent_steps_trained: 297000
    num_steps_sampled: 297000
    num_steps_trained: 297000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,297,12430,297000,-2.5579,-0.77,-6.33,257.03


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 298000
  custom_metrics: {}
  date: 2021-10-24_19-58-38
  done: false
  episode_len_mean: 259.98
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.5873999999999886
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 4
  episodes_total: 1047
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1783775325669113e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3539909270074633
          entropy_coeff: 0.009999999999999998
          kl: 0.013602016718149043
          policy_loss: 0.013683935503164928
          total_loss: 0.01581004514462418
          vf_explained_var: 0.053791724145412445
          vf_loss: 0.015665978762424655
    num_agent_steps_sampled: 298000
    num_agent_steps_trained: 298000
    num_steps_sampled: 298000
    num_steps_trained: 298

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,298,12466.9,298000,-2.5874,-0.77,-6.33,259.98




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 299000
  custom_metrics: {}
  date: 2021-10-24_19-59-31
  done: false
  episode_len_mean: 259.48
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.583199999999989
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 3
  episodes_total: 1050
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1783775325669113e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.4903048104710048
          entropy_coeff: 0.009999999999999998
          kl: 0.04533283911208035
          policy_loss: 0.03324814836184184
          total_loss: 0.028283039232095082
          vf_explained_var: -0.21541574597358704
          vf_loss: 0.009937793659951745
    num_agent_steps_sampled: 299000
    num_agent_steps_trained: 299000
    num_steps_sampled: 299000
    num_steps_trained: 29900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,299,12520.3,299000,-2.5832,-0.77,-6.33,259.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-10-24_20-00-14
  done: false
  episode_len_mean: 260.22
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.5905999999999887
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 4
  episodes_total: 1054
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.767566298850368e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0744149691528744
          entropy_coeff: 0.009999999999999998
          kl: 0.011434755756693388
          policy_loss: 0.005232725789149602
          total_loss: 0.00895994794037607
          vf_explained_var: 0.08584670722484589
          vf_loss: 0.014471323229372501
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 30000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,300,12563,300000,-2.5906,-0.77,-6.33,260.22


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 301000
  custom_metrics: {}
  date: 2021-10-24_20-00-55
  done: false
  episode_len_mean: 261.0
  episode_media: {}
  episode_reward_max: -0.7700000000000019
  episode_reward_mean: -2.598399999999989
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 4
  episodes_total: 1058
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.767566298850368e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0929106864664289
          entropy_coeff: 0.009999999999999998
          kl: 0.008562139432162674
          policy_loss: 0.012534125314818487
          total_loss: 0.015838831828700172
          vf_explained_var: 0.09743226319551468
          vf_loss: 0.01423377687525418
    num_agent_steps_sampled: 301000
    num_agent_steps_trained: 301000
    num_steps_sampled: 301000
    num_steps_trained: 301000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,301,12604.3,301000,-2.5984,-0.77,-6.33,261


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 302000
  custom_metrics: {}
  date: 2021-10-24_20-01-36
  done: false
  episode_len_mean: 260.75
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.6265999999999883
  episode_reward_min: -6.329999999999952
  episodes_this_iter: 3
  episodes_total: 1061
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.767566298850368e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0257338451014624
          entropy_coeff: 0.009999999999999998
          kl: 0.01503453194124865
          policy_loss: 0.0004207399156358507
          total_loss: 0.0008147759570015801
          vf_explained_var: 0.03425728902220726
          vf_loss: 0.010651304624560806
    num_agent_steps_sampled: 302000
    num_agent_steps_trained: 302000
    num_steps_sampled: 302000
    num_steps_trained: 302

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,302,12645.4,302000,-2.6266,-2.04,-6.33,260.75


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 303000
  custom_metrics: {}
  date: 2021-10-24_20-02-18
  done: false
  episode_len_mean: 259.73
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.597299999999988
  episode_reward_min: -3.289999999999974
  episodes_this_iter: 4
  episodes_total: 1065
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.767566298850368e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1354129221704272
          entropy_coeff: 0.009999999999999998
          kl: 0.011059448459012187
          policy_loss: 0.009052620165877871
          total_loss: 0.012381577905681397
          vf_explained_var: 0.06599447131156921
          vf_loss: 0.014683036081906822
    num_agent_steps_sampled: 303000
    num_agent_steps_trained: 303000
    num_steps_sampled: 303000
    num_steps_trained: 30300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,303,12686.8,303000,-2.5973,-2.04,-3.29,259.73


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 304000
  custom_metrics: {}
  date: 2021-10-24_20-02-58
  done: false
  episode_len_mean: 261.0
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.609999999999988
  episode_reward_min: -3.289999999999974
  episodes_this_iter: 4
  episodes_total: 1069
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.767566298850368e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2025962697135077
          entropy_coeff: 0.009999999999999998
          kl: 0.010485762123641646
          policy_loss: 0.007024645888143116
          total_loss: 0.01008355791370074
          vf_explained_var: 0.06549474596977234
          vf_loss: 0.0150848266047736
    num_agent_steps_sampled: 304000
    num_agent_steps_trained: 304000
    num_steps_sampled: 304000
    num_steps_trained: 304000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,304,12726.6,304000,-2.61,-2.04,-3.29,261


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 305000
  custom_metrics: {}
  date: 2021-10-24_20-03-38
  done: false
  episode_len_mean: 261.71
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.6170999999999873
  episode_reward_min: -3.289999999999974
  episodes_this_iter: 3
  episodes_total: 1072
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.767566298850368e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0898908933003744
          entropy_coeff: 0.009999999999999998
          kl: 0.008551736210051895
          policy_loss: -0.09153793040249082
          total_loss: -0.09005888650814692
          vf_explained_var: 0.05776802822947502
          vf_loss: 0.012377912468380398
    num_agent_steps_sampled: 305000
    num_agent_steps_trained: 305000
    num_steps_sampled: 305000
    num_steps_trained: 3050

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,305,12766.5,305000,-2.6171,-2.04,-3.29,261.71


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 306000
  custom_metrics: {}
  date: 2021-10-24_20-04-18
  done: false
  episode_len_mean: 262.94
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.629399999999987
  episode_reward_min: -3.289999999999974
  episodes_this_iter: 4
  episodes_total: 1076
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.767566298850368e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.015026451481713
          entropy_coeff: 0.009999999999999998
          kl: 0.015598089548938375
          policy_loss: -0.00020803411801656086
          total_loss: 0.004775485727522108
          vf_explained_var: 0.09284724295139313
          vf_loss: 0.015133710660868221
    num_agent_steps_sampled: 306000
    num_agent_steps_trained: 306000
    num_steps_sampled: 306000
    num_steps_trained: 306

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,306,12806.5,306000,-2.6294,-2.04,-3.29,262.94




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 307000
  custom_metrics: {}
  date: 2021-10-24_20-05-17
  done: false
  episode_len_mean: 263.57
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.6356999999999875
  episode_reward_min: -3.289999999999974
  episodes_this_iter: 4
  episodes_total: 1080
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.767566298850368e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9189925087822808
          entropy_coeff: 0.009999999999999998
          kl: 0.009531222850173687
          policy_loss: 0.014652312464184232
          total_loss: 0.02014471996161673
          vf_explained_var: 0.08461298793554306
          vf_loss: 0.014682293083104822
    num_agent_steps_sampled: 307000
    num_agent_steps_trained: 307000
    num_steps_sampled: 307000
    num_steps_trained: 30700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,307,12866.3,307000,-2.6357,-2.04,-3.29,263.57


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 308000
  custom_metrics: {}
  date: 2021-10-24_20-06-02
  done: false
  episode_len_mean: 263.95
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.639499999999988
  episode_reward_min: -3.289999999999974
  episodes_this_iter: 4
  episodes_total: 1084
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.767566298850368e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6485521892706553
          entropy_coeff: 0.009999999999999998
          kl: 0.03565249641579271
          policy_loss: -0.04272159917487039
          total_loss: -0.03600937873125076
          vf_explained_var: 0.12556982040405273
          vf_loss: 0.013197571494513088
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_steps_sampled: 308000
    num_steps_trained: 308000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,308,12910.4,308000,-2.6395,-2.04,-3.29,263.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 309000
  custom_metrics: {}
  date: 2021-10-24_20-06-48
  done: false
  episode_len_mean: 263.51
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.6350999999999876
  episode_reward_min: -3.289999999999974
  episodes_this_iter: 5
  episodes_total: 1089
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.151349448275552e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.4055114279190699
          entropy_coeff: 0.009999999999999998
          kl: 0.004921112502820317
          policy_loss: -0.03239372919003169
          total_loss: -0.021324597961372798
          vf_explained_var: 0.1169755607843399
          vf_loss: 0.015124214626848698
    num_agent_steps_sampled: 309000
    num_agent_steps_trained: 309000
    num_steps_sampled: 309000
    num_steps_trained: 3090

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,309,12956.9,309000,-2.6351,-2.04,-3.29,263.51


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 310000
  custom_metrics: {}
  date: 2021-10-24_20-07-36
  done: false
  episode_len_mean: 263.26
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.6325999999999867
  episode_reward_min: -3.289999999999974
  episodes_this_iter: 4
  episodes_total: 1093
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.575674724137776e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.1862171322107315
          entropy_coeff: 0.009999999999999998
          kl: 0.012688265938497227
          policy_loss: 0.030650358895460764
          total_loss: 0.039749121500386134
          vf_explained_var: 0.07679763436317444
          vf_loss: 0.01096089467820194
    num_agent_steps_sampled: 310000
    num_agent_steps_trained: 310000
    num_steps_sampled: 310000
    num_steps_trained: 31000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,310,13004.9,310000,-2.6326,-2.04,-3.29,263.26


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 311000
  custom_metrics: {}
  date: 2021-10-24_20-08-23
  done: false
  episode_len_mean: 262.85
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.6284999999999883
  episode_reward_min: -3.289999999999974
  episodes_this_iter: 5
  episodes_total: 1098
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.575674724137776e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.2693797583381335
          entropy_coeff: 0.009999999999999998
          kl: 0.03589869783697605
          policy_loss: -0.021132245080338584
          total_loss: -0.008402192344268164
          vf_explained_var: 0.12619657814502716
          vf_loss: 0.015423722306473387
    num_agent_steps_sampled: 311000
    num_agent_steps_trained: 311000
    num_steps_sampled: 311000
    num_steps_trained: 311

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,311,13051.6,311000,-2.6285,-2.04,-3.29,262.85


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 312000
  custom_metrics: {}
  date: 2021-10-24_20-09-09
  done: false
  episode_len_mean: 262.62
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.6261999999999883
  episode_reward_min: -3.289999999999974
  episodes_this_iter: 4
  episodes_total: 1102
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.3635120862066654e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.26422182851367526
          entropy_coeff: 0.009999999999999998
          kl: 0.02345700521206374
          policy_loss: 0.02681788524819745
          total_loss: 0.034738641170163946
          vf_explained_var: 0.17126870155334473
          vf_loss: 0.010562853649672535
    num_agent_steps_sampled: 312000
    num_agent_steps_trained: 312000
    num_steps_sampled: 312000
    num_steps_trained: 3120

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,312,13098,312000,-2.6262,-2.04,-3.29,262.62


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 313000
  custom_metrics: {}
  date: 2021-10-24_20-09-57
  done: false
  episode_len_mean: 261.63
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.616299999999988
  episode_reward_min: -3.289999999999974
  episodes_this_iter: 5
  episodes_total: 1107
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.045268129309996e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.21660566495524514
          entropy_coeff: 0.009999999999999998
          kl: 0.00984556698968583
          policy_loss: -0.020999048981401654
          total_loss: -0.010095956756008996
          vf_explained_var: 0.15519580245018005
          vf_loss: 0.013069077798475822
    num_agent_steps_sampled: 313000
    num_agent_steps_trained: 313000
    num_steps_sampled: 313000
    num_steps_trained: 313

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,313,13145.9,313000,-2.6163,-2.04,-3.29,261.63




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 314000
  custom_metrics: {}
  date: 2021-10-24_20-11-04
  done: false
  episode_len_mean: 260.38
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.603799999999988
  episode_reward_min: -3.289999999999974
  episodes_this_iter: 4
  episodes_total: 1111
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.045268129309996e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.19137555443578297
          entropy_coeff: 0.009999999999999998
          kl: 0.0017893173809996356
          policy_loss: 0.05570845339033339
          total_loss: 0.06263391110632155
          vf_explained_var: 0.10130573064088821
          vf_loss: 0.008839205166118013
    num_agent_steps_sampled: 314000
    num_agent_steps_trained: 314000
    num_steps_sampled: 314000
    num_steps_trained: 31400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,314,13212.8,314000,-2.6038,-1.93,-3.29,260.38


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 315000
  custom_metrics: {}
  date: 2021-10-24_20-11-51
  done: false
  episode_len_mean: 259.39
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.5938999999999885
  episode_reward_min: -3.289999999999974
  episodes_this_iter: 5
  episodes_total: 1116
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.022634064654998e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.1679222156604131
          entropy_coeff: 0.009999999999999998
          kl: 0.001942344165051995
          policy_loss: -0.01343676679664188
          total_loss: 0.0015314807494481405
          vf_explained_var: 0.05400701239705086
          vf_loss: 0.0166474641714659
    num_agent_steps_sampled: 315000
    num_agent_steps_trained: 315000
    num_steps_sampled: 315000
    num_steps_trained: 31500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,315,13259.3,315000,-2.5939,-1.93,-3.29,259.39


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 316000
  custom_metrics: {}
  date: 2021-10-24_20-12-40
  done: false
  episode_len_mean: 258.99
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.589899999999988
  episode_reward_min: -3.289999999999974
  episodes_this_iter: 4
  episodes_total: 1120
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.011317032327499e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.1321632910105917
          entropy_coeff: 0.009999999999999998
          kl: 0.00344602231763989
          policy_loss: 0.03863863332404031
          total_loss: 0.049359790484110516
          vf_explained_var: 0.10000525414943695
          vf_loss: 0.01204278479433722
    num_agent_steps_sampled: 316000
    num_agent_steps_trained: 316000
    num_steps_sampled: 316000
    num_steps_trained: 316000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,316,13308.2,316000,-2.5899,-1.93,-3.29,258.99


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 317000
  custom_metrics: {}
  date: 2021-10-24_20-13-27
  done: false
  episode_len_mean: 256.68
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.566799999999989
  episode_reward_min: -3.289999999999974
  episodes_this_iter: 5
  episodes_total: 1125
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0056585161637495e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.17085983910494382
          entropy_coeff: 0.009999999999999998
          kl: 0.004864164805756103
          policy_loss: -0.017785758814877935
          total_loss: -0.0016179587692022324
          vf_explained_var: 0.07474876195192337
          vf_loss: 0.017876396162642375
    num_agent_steps_sampled: 317000
    num_agent_steps_trained: 317000
    num_steps_sampled: 317000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,317,13355.3,317000,-2.5668,-1.93,-3.29,256.68


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 318000
  custom_metrics: {}
  date: 2021-10-24_20-14-15
  done: false
  episode_len_mean: 253.59
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.5358999999999896
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 4
  episodes_total: 1129
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.028292580818748e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.3401954430672858
          entropy_coeff: 0.009999999999999998
          kl: 0.16906462629698177
          policy_loss: 0.02014481681916449
          total_loss: 0.0276655714544985
          vf_explained_var: 0.27883419394493103
          vf_loss: 0.01092262463644147
    num_agent_steps_sampled: 318000
    num_agent_steps_trained: 318000
    num_steps_sampled: 318000
    num_steps_trained: 318000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,318,13403,318000,-2.5359,-1.93,-3.28,253.59


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 319000
  custom_metrics: {}
  date: 2021-10-24_20-15-02
  done: false
  episode_len_mean: 250.06
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.5005999999999906
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 5
  episodes_total: 1134
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.542438871228123e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.18163433058394327
          entropy_coeff: 0.009999999999999998
          kl: 0.0033368795846170783
          policy_loss: -0.04158531708849801
          total_loss: -0.03104919075138039
          vf_explained_var: 0.30645906925201416
          vf_loss: 0.012352466376291381
    num_agent_steps_sampled: 319000
    num_agent_steps_trained: 319000
    num_steps_sampled: 319000
    num_steps_trained: 31

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,319,13450.3,319000,-2.5006,-1.93,-3.28,250.06


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-10-24_20-15-46
  done: false
  episode_len_mean: 247.56
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.475599999999991
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 4
  episodes_total: 1138
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7712194356140617e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.2366464275452826
          entropy_coeff: 0.009999999999999998
          kl: 0.008068597207141674
          policy_loss: 0.018558419495821
          total_loss: 0.027163518220186235
          vf_explained_var: 0.2688191831111908
          vf_loss: 0.010971562409152587
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained: 320000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,320,13494,320000,-2.4756,-1.93,-3.28,247.56




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 321000
  custom_metrics: {}
  date: 2021-10-24_20-16-51
  done: false
  episode_len_mean: 244.96
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4495999999999913
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 4
  episodes_total: 1142
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7712194356140617e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.32615844955046974
          entropy_coeff: 0.009999999999999998
          kl: 0.010060662376801114
          policy_loss: 0.029336037321223155
          total_loss: 0.0372977737751272
          vf_explained_var: 0.23654253780841827
          vf_loss: 0.011223319245295392
    num_agent_steps_sampled: 321000
    num_agent_steps_trained: 321000
    num_steps_sampled: 321000
    num_steps_trained: 3210

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,321,13559,321000,-2.4496,-1.93,-3.28,244.96


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 322000
  custom_metrics: {}
  date: 2021-10-24_20-17-32
  done: false
  episode_len_mean: 243.07
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.430699999999992
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 4
  episodes_total: 1146
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7712194356140617e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8083203295866649
          entropy_coeff: 0.009999999999999998
          kl: 0.08140521345570527
          policy_loss: 0.0025723542604181504
          total_loss: 0.0058000010748704275
          vf_explained_var: 0.1753665953874588
          vf_loss: 0.011310821243872246
    num_agent_steps_sampled: 322000
    num_agent_steps_trained: 322000
    num_steps_sampled: 322000
    num_steps_trained: 3220

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,322,13600,322000,-2.4307,-1.93,-3.28,243.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 323000
  custom_metrics: {}
  date: 2021-10-24_20-18-17
  done: false
  episode_len_mean: 240.35
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4034999999999926
  episode_reward_min: -2.909999999999982
  episodes_this_iter: 4
  episodes_total: 1150
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.656829153421092e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.3975800997681088
          entropy_coeff: 0.009999999999999998
          kl: 0.0030240724714641625
          policy_loss: 0.04940395024087694
          total_loss: 0.054741023812029097
          vf_explained_var: 0.07960943132638931
          vf_loss: 0.009312876707149876
    num_agent_steps_sampled: 323000
    num_agent_steps_trained: 323000
    num_steps_sampled: 323000
    num_steps_trained: 3230

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,323,13645.8,323000,-2.4035,-1.93,-2.91,240.35


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 324000
  custom_metrics: {}
  date: 2021-10-24_20-19-05
  done: false
  episode_len_mean: 238.88
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3887999999999923
  episode_reward_min: -2.909999999999982
  episodes_this_iter: 4
  episodes_total: 1154
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.828414576710546e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.27425795247157414
          entropy_coeff: 0.009999999999999998
          kl: 0.0038203136703943287
          policy_loss: -0.030544419172737335
          total_loss: -0.020625482582383687
          vf_explained_var: 0.04388105124235153
          vf_loss: 0.012661516676760382
    num_agent_steps_sampled: 324000
    num_agent_steps_trained: 324000
    num_steps_sampled: 324000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,324,13693.5,324000,-2.3888,-1.93,-2.91,238.88


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 325000
  custom_metrics: {}
  date: 2021-10-24_20-19-52
  done: false
  episode_len_mean: 236.99
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3698999999999932
  episode_reward_min: -2.909999999999982
  episodes_this_iter: 5
  episodes_total: 1159
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.414207288355273e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.3606116962101724
          entropy_coeff: 0.009999999999999998
          kl: 0.014071108205100814
          policy_loss: -0.007773536443710327
          total_loss: 0.00477095999651485
          vf_explained_var: 0.07500241696834564
          vf_loss: 0.016150613108442888
    num_agent_steps_sampled: 325000
    num_agent_steps_trained: 325000
    num_steps_sampled: 325000
    num_steps_trained: 3250

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,325,13740.5,325000,-2.3699,-1.93,-2.91,236.99


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 326000
  custom_metrics: {}
  date: 2021-10-24_20-20-40
  done: false
  episode_len_mean: 235.29
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3528999999999938
  episode_reward_min: -2.909999999999982
  episodes_this_iter: 4
  episodes_total: 1163
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.414207288355273e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.5349509982599152
          entropy_coeff: 0.009999999999999998
          kl: 0.06005055627603673
          policy_loss: 0.019796169921755792
          total_loss: 0.02753906970222791
          vf_explained_var: 0.0391034297645092
          vf_loss: 0.013092402461916208
    num_agent_steps_sampled: 326000
    num_agent_steps_trained: 326000
    num_steps_sampled: 326000
    num_steps_trained: 326000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,326,13787.8,326000,-2.3529,-1.93,-2.91,235.29


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 327000
  custom_metrics: {}
  date: 2021-10-24_20-21-22
  done: false
  episode_len_mean: 234.39
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3438999999999934
  episode_reward_min: -2.909999999999982
  episodes_this_iter: 4
  episodes_total: 1167
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1213109325329094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8045478290981717
          entropy_coeff: 0.009999999999999998
          kl: 0.008481572126358116
          policy_loss: -0.005051353159877989
          total_loss: 0.0006152472148338953
          vf_explained_var: 0.02389458566904068
          vf_loss: 0.013712077515406741
    num_agent_steps_sampled: 327000
    num_agent_steps_trained: 327000
    num_steps_sampled: 327000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,327,13830.5,327000,-2.3439,-1.93,-2.91,234.39




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 328000
  custom_metrics: {}
  date: 2021-10-24_20-22-24
  done: false
  episode_len_mean: 233.33
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.333299999999994
  episode_reward_min: -2.909999999999982
  episodes_this_iter: 4
  episodes_total: 1171
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1213109325329094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.734167222181956
          entropy_coeff: 0.009999999999999998
          kl: 0.007700551523644745
          policy_loss: -0.017936781835224895
          total_loss: -0.011653007194399833
          vf_explained_var: 0.055853504687547684
          vf_loss: 0.013625445040977664
    num_agent_steps_sampled: 328000
    num_agent_steps_trained: 328000
    num_steps_sampled: 328000
    num_steps_trained: 32

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,328,13892.3,328000,-2.3333,-1.93,-2.91,233.33


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 329000
  custom_metrics: {}
  date: 2021-10-24_20-23-09
  done: false
  episode_len_mean: 232.27
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.322699999999994
  episode_reward_min: -2.859999999999983
  episodes_this_iter: 4
  episodes_total: 1175
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1213109325329094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7496203038427565
          entropy_coeff: 0.009999999999999998
          kl: 0.01087819586923394
          policy_loss: -0.07262198337250286
          total_loss: -0.06490989012850655
          vf_explained_var: 0.055326588451862335
          vf_loss: 0.015208294480625125
    num_agent_steps_sampled: 329000
    num_agent_steps_trained: 329000
    num_steps_sampled: 329000
    num_steps_trained: 3290

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,329,13937.2,329000,-2.3227,-1.93,-2.86,232.27


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 330000
  custom_metrics: {}
  date: 2021-10-24_20-23-51
  done: false
  episode_len_mean: 231.44
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3143999999999942
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 1179
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1213109325329094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6833949671851264
          entropy_coeff: 0.009999999999999998
          kl: 0.007022612577265856
          policy_loss: -0.09184487528271146
          total_loss: -0.08370149119032753
          vf_explained_var: 0.0651908740401268
          vf_loss: 0.01497733202866382
    num_agent_steps_sampled: 330000
    num_agent_steps_trained: 330000
    num_steps_sampled: 330000
    num_steps_trained: 33000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,330,13979.4,330000,-2.3144,-1.93,-2.85,231.44


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 331000
  custom_metrics: {}
  date: 2021-10-24_20-24-36
  done: false
  episode_len_mean: 231.75
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.317499999999994
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 5
  episodes_total: 1184
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1213109325329094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.736539884408315
          entropy_coeff: 0.009999999999999998
          kl: 0.0075367188160672385
          policy_loss: -0.019209572093354332
          total_loss: -0.010851691166559855
          vf_explained_var: 0.07239369302988052
          vf_loss: 0.01572328055691388
    num_agent_steps_sampled: 331000
    num_agent_steps_trained: 331000
    num_steps_sampled: 331000
    num_steps_trained: 331

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,331,14024.2,331000,-2.3175,-1.93,-2.85,231.75


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 332000
  custom_metrics: {}
  date: 2021-10-24_20-25-19
  done: false
  episode_len_mean: 232.6
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.325999999999994
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 3
  episodes_total: 1187
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1213109325329094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9083556161986457
          entropy_coeff: 0.009999999999999998
          kl: 0.008306903899428593
          policy_loss: -0.09157479289505217
          total_loss: -0.0878586956196361
          vf_explained_var: 0.04770165681838989
          vf_loss: 0.012799655842698283
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_steps_sampled: 332000
    num_steps_trained: 332000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,332,14067.2,332000,-2.326,-1.93,-2.85,232.6


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 333000
  custom_metrics: {}
  date: 2021-10-24_20-26-01
  done: false
  episode_len_mean: 233.58
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3357999999999937
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 1191
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1213109325329094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7461321042643653
          entropy_coeff: 0.009999999999999998
          kl: 0.005324861257165499
          policy_loss: -0.08382027993599574
          total_loss: -0.07599763580494456
          vf_explained_var: 0.04960717633366585
          vf_loss: 0.015283965557399724
    num_agent_steps_sampled: 333000
    num_agent_steps_trained: 333000
    num_steps_sampled: 333000
    num_steps_trained: 333

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,333,14109.2,333000,-2.3358,-1.93,-2.85,233.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 334000
  custom_metrics: {}
  date: 2021-10-24_20-26-45
  done: false
  episode_len_mean: 234.83
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3482999999999934
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 1195
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1213109325329094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6850468224949307
          entropy_coeff: 0.009999999999999998
          kl: 0.0027007657156670147
          policy_loss: -0.03027832309405009
          total_loss: -0.024152438011434345
          vf_explained_var: 0.05500483512878418
          vf_loss: 0.012976353977703386
    num_agent_steps_sampled: 334000
    num_agent_steps_trained: 334000
    num_steps_sampled: 334000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,334,14152.9,334000,-2.3483,-1.93,-2.85,234.83


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 335000
  custom_metrics: {}
  date: 2021-10-24_20-27-27
  done: false
  episode_len_mean: 235.98
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3597999999999932
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 1199
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0606554662664547e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6701197134123908
          entropy_coeff: 0.009999999999999998
          kl: 0.003806327445245472
          policy_loss: -0.01666224863794115
          total_loss: -0.010168319278293186
          vf_explained_var: 0.04731340333819389
          vf_loss: 0.01319512613117695
    num_agent_steps_sampled: 335000
    num_agent_steps_trained: 335000
    num_steps_sampled: 335000
    num_steps_trained: 335

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,335,14195,335000,-2.3598,-1.93,-2.85,235.98




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 336000
  custom_metrics: {}
  date: 2021-10-24_20-28-30
  done: false
  episode_len_mean: 236.77
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3676999999999935
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 5
  episodes_total: 1204
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.3032773313322734e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.618458918068144
          entropy_coeff: 0.009999999999999998
          kl: 0.004172461141869693
          policy_loss: -0.006027148332860735
          total_loss: 0.0013659957382414075
          vf_explained_var: 0.10942593961954117
          vf_loss: 0.013577737752348185
    num_agent_steps_sampled: 336000
    num_agent_steps_trained: 336000
    num_steps_sampled: 336000
    num_steps_trained: 33

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,336,14258.1,336000,-2.3677,-1.93,-2.85,236.77


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 337000
  custom_metrics: {}
  date: 2021-10-24_20-29-15
  done: false
  episode_len_mean: 237.54
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3753999999999933
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 1208
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6516386656661367e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5450629035631815
          entropy_coeff: 0.009999999999999998
          kl: 0.0036809364185616446
          policy_loss: 0.015421114034122892
          total_loss: 0.023591143720679813
          vf_explained_var: 0.05297456681728363
          vf_loss: 0.01362066109561258
    num_agent_steps_sampled: 337000
    num_agent_steps_trained: 337000
    num_steps_sampled: 337000
    num_steps_trained: 337

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,337,14302.8,337000,-2.3754,-1.93,-2.85,237.54


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 338000
  custom_metrics: {}
  date: 2021-10-24_20-29-59
  done: false
  episode_len_mean: 238.47
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.384699999999993
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 1212
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3258193328330684e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5265984455744426
          entropy_coeff: 0.009999999999999998
          kl: 0.003323006602925223
          policy_loss: 0.01733623734778828
          total_loss: 0.02580344511402978
          vf_explained_var: 0.04771127551794052
          vf_loss: 0.013733192657430967
    num_agent_steps_sampled: 338000
    num_agent_steps_trained: 338000
    num_steps_sampled: 338000
    num_steps_trained: 338000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,338,14346.7,338000,-2.3847,-1.96,-2.85,238.47


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 339000
  custom_metrics: {}
  date: 2021-10-24_20-30-44
  done: false
  episode_len_mean: 239.02
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.3901999999999926
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 1216
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.629096664165342e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6534101009368897
          entropy_coeff: 0.009999999999999998
          kl: 0.008102188895957043
          policy_loss: 0.015735790381828943
          total_loss: 0.023414471828275257
          vf_explained_var: 0.04944843053817749
          vf_loss: 0.014212783674399059
    num_agent_steps_sampled: 339000
    num_agent_steps_trained: 339000
    num_steps_sampled: 339000
    num_steps_trained: 3390

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,339,14392.1,339000,-2.3902,-1.96,-2.85,239.02


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 340000
  custom_metrics: {}
  date: 2021-10-24_20-31-28
  done: false
  episode_len_mean: 239.9
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.3989999999999925
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 1220
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.629096664165342e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7531989084349738
          entropy_coeff: 0.009999999999999998
          kl: 0.012789759362157543
          policy_loss: 0.007498961769872242
          total_loss: 0.01458752950032552
          vf_explained_var: 0.033293530344963074
          vf_loss: 0.014620557179053624
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_steps_sampled: 340000
    num_steps_trained: 34000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,340,14436,340000,-2.399,-1.96,-2.85,239.9


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 341000
  custom_metrics: {}
  date: 2021-10-24_20-32-11
  done: false
  episode_len_mean: 240.99
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.4098999999999924
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 1224
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.629096664165342e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8161788688765632
          entropy_coeff: 0.009999999999999998
          kl: 0.009510977947698902
          policy_loss: -0.0016001521713203855
          total_loss: 0.004437357890937063
          vf_explained_var: 0.04259686917066574
          vf_loss: 0.01419929734741648
    num_agent_steps_sampled: 341000
    num_agent_steps_trained: 341000
    num_steps_sampled: 341000
    num_steps_trained: 341

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,341,14479.2,341000,-2.4099,-1.96,-2.85,240.99


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 342000
  custom_metrics: {}
  date: 2021-10-24_20-32-55
  done: false
  episode_len_mean: 242.11
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.421099999999992
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 1228
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.629096664165342e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7616618421342638
          entropy_coeff: 0.009999999999999998
          kl: 0.010384025829449053
          policy_loss: 0.006711841788556841
          total_loss: 0.013268012843198247
          vf_explained_var: 0.052348800003528595
          vf_loss: 0.01417278651562002
    num_agent_steps_sampled: 342000
    num_agent_steps_trained: 342000
    num_steps_sampled: 342000
    num_steps_trained: 34200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,342,14523.2,342000,-2.4211,-1.96,-2.85,242.11




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 343000
  custom_metrics: {}
  date: 2021-10-24_20-34-00
  done: false
  episode_len_mean: 242.64
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.4263999999999926
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 1232
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.629096664165342e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7905287120077346
          entropy_coeff: 0.009999999999999998
          kl: 0.011789271666175693
          policy_loss: -0.04053467528687583
          total_loss: -0.034642266233762105
          vf_explained_var: 0.07723500579595566
          vf_loss: 0.013797695831292206
    num_agent_steps_sampled: 343000
    num_agent_steps_trained: 343000
    num_steps_sampled: 343000
    num_steps_trained: 343

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,343,14587.8,343000,-2.4264,-1.96,-2.85,242.64


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 344000
  custom_metrics: {}
  date: 2021-10-24_20-34-44
  done: false
  episode_len_mean: 242.74
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.427399999999992
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 1236
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.629096664165342e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7844680329163869
          entropy_coeff: 0.009999999999999998
          kl: 0.012451952091317824
          policy_loss: -0.056077132125695546
          total_loss: -0.04865535100301107
          vf_explained_var: 0.06205602362751961
          vf_loss: 0.015266459414528475
    num_agent_steps_sampled: 344000
    num_agent_steps_trained: 344000
    num_steps_sampled: 344000
    num_steps_trained: 3440

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,344,14631.5,344000,-2.4274,-1.96,-2.85,242.74


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 345000
  custom_metrics: {}
  date: 2021-10-24_20-35-26
  done: false
  episode_len_mean: 244.1
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.440999999999992
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 1240
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.629096664165342e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6788005338774787
          entropy_coeff: 0.009999999999999998
          kl: 0.014514326554447423
          policy_loss: -0.03400328564974997
          total_loss: -0.02655492317345407
          vf_explained_var: 0.1104879379272461
          vf_loss: 0.014236364244586892
    num_agent_steps_sampled: 345000
    num_agent_steps_trained: 345000
    num_steps_sampled: 345000
    num_steps_trained: 345000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,345,14674.1,345000,-2.441,-2.08,-2.85,244.1


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 346000
  custom_metrics: {}
  date: 2021-10-24_20-36-08
  done: false
  episode_len_mean: 244.29
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.442899999999992
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 1244
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.629096664165342e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8950373318460253
          entropy_coeff: 0.009999999999999998
          kl: 0.032135924929551626
          policy_loss: -0.01820653354128202
          total_loss: -0.013642228643099467
          vf_explained_var: 0.24128004908561707
          vf_loss: 0.013514678583790858
    num_agent_steps_sampled: 346000
    num_agent_steps_trained: 346000
    num_steps_sampled: 346000
    num_steps_trained: 3460

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,346,14716.1,346000,-2.4429,-2.08,-2.85,244.29


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 347000
  custom_metrics: {}
  date: 2021-10-24_20-36-51
  done: false
  episode_len_mean: 244.46
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.444599999999992
  episode_reward_min: -2.759999999999985
  episodes_this_iter: 4
  episodes_total: 1248
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.943644996248012e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8231318526797824
          entropy_coeff: 0.009999999999999998
          kl: 0.02369322923637327
          policy_loss: 0.008215723517868255
          total_loss: 0.013344591524865891
          vf_explained_var: 0.17676131427288055
          vf_loss: 0.01336018725608786
    num_agent_steps_sampled: 347000
    num_agent_steps_trained: 347000
    num_steps_sampled: 347000
    num_steps_trained: 347000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,347,14758.7,347000,-2.4446,-2.08,-2.76,244.46


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 348000
  custom_metrics: {}
  date: 2021-10-24_20-37-31
  done: false
  episode_len_mean: 246.0
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.4599999999999915
  episode_reward_min: -2.769999999999985
  episodes_this_iter: 4
  episodes_total: 1252
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.491546749437202e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.0471420327822367
          entropy_coeff: 0.009999999999999998
          kl: 0.03693331818971974
          policy_loss: 0.02184656833608945
          total_loss: 0.023552602612309986
          vf_explained_var: 0.2253112941980362
          vf_loss: 0.012177458881504006
    num_agent_steps_sampled: 348000
    num_agent_steps_trained: 348000
    num_steps_sampled: 348000
    num_steps_trained: 348000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,348,14799.1,348000,-2.46,-2.08,-2.77,246


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 349000
  custom_metrics: {}
  date: 2021-10-24_20-38-11
  done: false
  episode_len_mean: 247.77
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.477699999999991
  episode_reward_min: -3.2099999999999755
  episodes_this_iter: 3
  episodes_total: 1255
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2373201241558026e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.121073583761851
          entropy_coeff: 0.009999999999999998
          kl: 0.013851391107770974
          policy_loss: 0.011537888563341564
          total_loss: 0.010446830921702914
          vf_explained_var: 0.15350688993930817
          vf_loss: 0.01011967892344627
    num_agent_steps_sampled: 349000
    num_agent_steps_trained: 349000
    num_steps_sampled: 349000
    num_steps_trained: 34900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,349,14838.6,349000,-2.4777,-2.08,-3.21,247.77


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 350000
  custom_metrics: {}
  date: 2021-10-24_20-38-54
  done: false
  episode_len_mean: 248.91
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.4890999999999908
  episode_reward_min: -3.2099999999999755
  episodes_this_iter: 4
  episodes_total: 1259
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2373201241558026e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.0705264965693155
          entropy_coeff: 0.009999999999999998
          kl: 0.08029824721657029
          policy_loss: -0.012890115297502941
          total_loss: -0.013789405135644807
          vf_explained_var: 0.3121907413005829
          vf_loss: 0.00980597086664703
    num_agent_steps_sampled: 350000
    num_agent_steps_trained: 350000
    num_steps_sampled: 350000
    num_steps_trained: 350

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,350,14881.4,350000,-2.4891,-2.08,-3.21,248.91




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 351000
  custom_metrics: {}
  date: 2021-10-24_20-39-49
  done: false
  episode_len_mean: 250.95
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.50949999999999
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 3
  episodes_total: 1262
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3559801862337036e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.246669598420461
          entropy_coeff: 0.009999999999999998
          kl: 0.026326759982961946
          policy_loss: -0.11865606423881318
          total_loss: -0.12224136822753483
          vf_explained_var: 0.3429650068283081
          vf_loss: 0.008881394197750423
    num_agent_steps_sampled: 351000
    num_agent_steps_trained: 351000
    num_steps_sampled: 351000
    num_steps_trained: 351000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,351,14936.4,351000,-2.5095,-2.08,-3.26,250.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 352000
  custom_metrics: {}
  date: 2021-10-24_20-40-25
  done: false
  episode_len_mean: 253.66
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.5365999999999898
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 1266
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.033970279350556e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1854242748684354
          entropy_coeff: 0.009999999999999998
          kl: 0.018147584585724668
          policy_loss: 0.008117462446292242
          total_loss: 0.006366827256149716
          vf_explained_var: 0.16844293475151062
          vf_loss: 0.0101036062464118
    num_agent_steps_sampled: 352000
    num_agent_steps_trained: 352000
    num_steps_sampled: 352000
    num_steps_trained: 35200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,352,14973,352000,-2.5366,-2.08,-3.26,253.66


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 353000
  custom_metrics: {}
  date: 2021-10-24_20-41-04
  done: false
  episode_len_mean: 254.76
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.5475999999999894
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 3
  episodes_total: 1269
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.033970279350556e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1644847644699945
          entropy_coeff: 0.009999999999999998
          kl: 0.032620792460143495
          policy_loss: -0.01515158646636539
          total_loss: -0.019152565466033087
          vf_explained_var: 0.027555979788303375
          vf_loss: 0.007643869259562861
    num_agent_steps_sampled: 353000
    num_agent_steps_trained: 353000
    num_steps_sampled: 353000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,353,15011.2,353000,-2.5476,-2.08,-3.26,254.76


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 354000
  custom_metrics: {}
  date: 2021-10-24_20-41-39
  done: false
  episode_len_mean: 257.09
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.570899999999989
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 3
  episodes_total: 1272
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.550955419025833e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.138226130273607
          entropy_coeff: 0.009999999999999998
          kl: 0.017832765357531363
          policy_loss: -0.10182388226191202
          total_loss: -0.10121545493602753
          vf_explained_var: 0.06722075492143631
          vf_loss: 0.011990684746868081
    num_agent_steps_sampled: 354000
    num_agent_steps_trained: 354000
    num_steps_sampled: 354000
    num_steps_trained: 35400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,354,15046.6,354000,-2.5709,-2.08,-3.26,257.09


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 355000
  custom_metrics: {}
  date: 2021-10-24_20-42-16
  done: false
  episode_len_mean: 259.4
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.5939999999999883
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 1276
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.550955419025833e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.113712403509352
          entropy_coeff: 0.009999999999999998
          kl: 0.007481424691419392
          policy_loss: 0.008234844853480658
          total_loss: 0.009342238762312466
          vf_explained_var: 0.08456375449895859
          vf_loss: 0.012244517128500674
    num_agent_steps_sampled: 355000
    num_agent_steps_trained: 355000
    num_steps_sampled: 355000
    num_steps_trained: 35500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,355,15083.2,355000,-2.594,-2.08,-3.26,259.4


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 356000
  custom_metrics: {}
  date: 2021-10-24_20-42-53
  done: false
  episode_len_mean: 261.03
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.6102999999999876
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 3
  episodes_total: 1279
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.550955419025833e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1779816852675544
          entropy_coeff: 0.009999999999999998
          kl: 0.006930828939939963
          policy_loss: 0.06022842311196857
          total_loss: 0.05730522043175167
          vf_explained_var: 0.05830284580588341
          vf_loss: 0.008856614875710673
    num_agent_steps_sampled: 356000
    num_agent_steps_trained: 356000
    num_steps_sampled: 356000
    num_steps_trained: 35600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,356,15120.2,356000,-2.6103,-2.08,-3.26,261.03


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 357000
  custom_metrics: {}
  date: 2021-10-24_20-43-29
  done: false
  episode_len_mean: 262.71
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.6270999999999884
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 3
  episodes_total: 1282
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.550955419025833e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.2443077736430699
          entropy_coeff: 0.009999999999999998
          kl: 0.007853314196962597
          policy_loss: -0.04705248334341579
          total_loss: -0.04865408134129312
          vf_explained_var: 0.013424118049442768
          vf_loss: 0.010841478671257695
    num_agent_steps_sampled: 357000
    num_agent_steps_trained: 357000
    num_steps_sampled: 357000
    num_steps_trained: 35

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,357,15156.2,357000,-2.6271,-2.08,-3.26,262.71


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 358000
  custom_metrics: {}
  date: 2021-10-24_20-44-06
  done: false
  episode_len_mean: 264.53
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.6452999999999878
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 3
  episodes_total: 1285
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.550955419025833e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.0488999485969543
          entropy_coeff: 0.009999999999999998
          kl: 0.021226190394438602
          policy_loss: -0.10115700082646475
          total_loss: -0.09943483803007337
          vf_explained_var: 0.03904774785041809
          vf_loss: 0.012211154106383522
    num_agent_steps_sampled: 358000
    num_agent_steps_trained: 358000
    num_steps_sampled: 358000
    num_steps_trained: 3580

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,358,15193.6,358000,-2.6453,-2.08,-3.28,264.53


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 359000
  custom_metrics: {}
  date: 2021-10-24_20-44-46
  done: false
  episode_len_mean: 265.46
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.6545999999999874
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 4
  episodes_total: 1289
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1326433128538755e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8798585096995036
          entropy_coeff: 0.009999999999999998
          kl: 0.008571239043797618
          policy_loss: -0.011770723594559563
          total_loss: -0.010325994011428622
          vf_explained_var: 0.14605426788330078
          vf_loss: 0.010243311560609274
    num_agent_steps_sampled: 359000
    num_agent_steps_trained: 359000
    num_steps_sampled: 359000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,359,15233.6,359000,-2.6546,-2.08,-3.28,265.46




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-10-24_20-45-43
  done: false
  episode_len_mean: 265.97
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.659699999999988
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 4
  episodes_total: 1293
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1326433128538755e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8917840262254079
          entropy_coeff: 0.009999999999999998
          kl: 0.00729361648369762
          policy_loss: 0.0003752009322245916
          total_loss: 0.006164613821440273
          vf_explained_var: 0.0897531509399414
          vf_loss: 0.014707250851723882
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trained: 36000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,360,15290.6,360000,-2.6597,-2.08,-3.28,265.97


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 361000
  custom_metrics: {}
  date: 2021-10-24_20-46-22
  done: false
  episode_len_mean: 267.02
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.6701999999999866
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 3
  episodes_total: 1296
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1326433128538755e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9007751166820526
          entropy_coeff: 0.009999999999999998
          kl: 0.005352322425735186
          policy_loss: -0.11054057462347879
          total_loss: -0.10670438143942092
          vf_explained_var: 0.22045668959617615
          vf_loss: 0.012843940707130564
    num_agent_steps_sampled: 361000
    num_agent_steps_trained: 361000
    num_steps_sampled: 361000
    num_steps_trained: 361

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,361,15329.4,361000,-2.6702,-2.08,-3.28,267.02


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 362000
  custom_metrics: {}
  date: 2021-10-24_20-46-59
  done: false
  episode_len_mean: 269.11
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6910999999999863
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 4
  episodes_total: 1300
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1326433128538755e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8813527372148302
          entropy_coeff: 0.009999999999999998
          kl: 0.007946472955047707
          policy_loss: 0.01595823648903105
          total_loss: 0.02180294038520919
          vf_explained_var: 0.09412790089845657
          vf_loss: 0.014658233419888549
    num_agent_steps_sampled: 362000
    num_agent_steps_trained: 362000
    num_steps_sampled: 362000
    num_steps_trained: 36200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,362,15365.9,362000,-2.6911,-2.19,-3.28,269.11


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 363000
  custom_metrics: {}
  date: 2021-10-24_20-47-38
  done: false
  episode_len_mean: 270.19
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.701899999999986
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 3
  episodes_total: 1303
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1326433128538755e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8534259147114224
          entropy_coeff: 0.009999999999999998
          kl: 0.015516218301758993
          policy_loss: -0.10027738139033318
          total_loss: -0.09466022426883379
          vf_explained_var: 0.1500336080789566
          vf_loss: 0.014151415642764833
    num_agent_steps_sampled: 363000
    num_agent_steps_trained: 363000
    num_steps_sampled: 363000
    num_steps_trained: 36300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,363,15404.8,363000,-2.7019,-2.19,-3.28,270.19


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 364000
  custom_metrics: {}
  date: 2021-10-24_20-48-17
  done: false
  episode_len_mean: 271.95
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.719499999999986
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 4
  episodes_total: 1307
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1326433128538755e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9383820725811852
          entropy_coeff: 0.009999999999999998
          kl: 0.05502938420887686
          policy_loss: 0.0017888935489786995
          total_loss: 0.005709660591350661
          vf_explained_var: 0.2653781771659851
          vf_loss: 0.013304582900471158
    num_agent_steps_sampled: 364000
    num_agent_steps_trained: 364000
    num_steps_sampled: 364000
    num_steps_trained: 36400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,364,15444.2,364000,-2.7195,-2.19,-3.28,271.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 365000
  custom_metrics: {}
  date: 2021-10-24_20-48-55
  done: false
  episode_len_mean: 273.46
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.734599999999985
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 3
  episodes_total: 1310
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6989649692808132e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7976926545302073
          entropy_coeff: 0.009999999999999998
          kl: 0.005880883751630211
          policy_loss: -0.02046681394179662
          total_loss: -0.018247680366039278
          vf_explained_var: 0.2633565366268158
          vf_loss: 0.01019605870436256
    num_agent_steps_sampled: 365000
    num_agent_steps_trained: 365000
    num_steps_sampled: 365000
    num_steps_trained: 36500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,365,15481.8,365000,-2.7346,-2.19,-3.28,273.46


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 366000
  custom_metrics: {}
  date: 2021-10-24_20-49-31
  done: false
  episode_len_mean: 275.95
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.759499999999985
  episode_reward_min: -3.4499999999999704
  episodes_this_iter: 4
  episodes_total: 1314
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6989649692808132e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7246062848303053
          entropy_coeff: 0.009999999999999998
          kl: 0.014351992587612095
          policy_loss: -0.008247588823239008
          total_loss: -0.0007933077712853749
          vf_explained_var: 0.15155728161334991
          vf_loss: 0.014700340769357152
    num_agent_steps_sampled: 366000
    num_agent_steps_trained: 366000
    num_steps_sampled: 366000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,366,15518.5,366000,-2.7595,-2.19,-3.45,275.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 367000
  custom_metrics: {}
  date: 2021-10-24_20-50-11
  done: false
  episode_len_mean: 276.97
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.7696999999999847
  episode_reward_min: -3.4499999999999704
  episodes_this_iter: 3
  episodes_total: 1317
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6989649692808132e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8245579971207513
          entropy_coeff: 0.009999999999999998
          kl: 0.013963827188416014
          policy_loss: 0.022372910877068837
          total_loss: 0.025368217792775895
          vf_explained_var: 0.019883817061781883
          vf_loss: 0.0112408869408278
    num_agent_steps_sampled: 367000
    num_agent_steps_trained: 367000
    num_steps_sampled: 367000
    num_steps_trained: 367

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,367,15558.2,367000,-2.7697,-2.19,-3.45,276.97




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 368000
  custom_metrics: {}
  date: 2021-10-24_20-51-08
  done: false
  episode_len_mean: 278.44
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.7843999999999847
  episode_reward_min: -3.4499999999999704
  episodes_this_iter: 4
  episodes_total: 1321
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6989649692808132e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.995846035083135
          entropy_coeff: 0.009999999999999998
          kl: 0.013418855194836413
          policy_loss: 0.013319524625937143
          total_loss: 0.01788061261177063
          vf_explained_var: 0.07005619257688522
          vf_loss: 0.01451954830230938
    num_agent_steps_sampled: 368000
    num_agent_steps_trained: 368000
    num_steps_sampled: 368000
    num_steps_trained: 36800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,368,15615,368000,-2.7844,-2.19,-3.45,278.44


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 369000
  custom_metrics: {}
  date: 2021-10-24_20-51-49
  done: false
  episode_len_mean: 279.11
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.791099999999984
  episode_reward_min: -3.4499999999999704
  episodes_this_iter: 3
  episodes_total: 1324
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6989649692808132e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1217269751760695
          entropy_coeff: 0.009999999999999998
          kl: 0.007832774159930445
          policy_loss: -0.08770235892799165
          total_loss: -0.0876864817407396
          vf_explained_var: 0.05955321341753006
          vf_loss: 0.011233142077819341
    num_agent_steps_sampled: 369000
    num_agent_steps_trained: 369000
    num_steps_sampled: 369000
    num_steps_trained: 3690

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,369,15656.5,369000,-2.7911,-2.19,-3.45,279.11


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 370000
  custom_metrics: {}
  date: 2021-10-24_20-52-28
  done: false
  episode_len_mean: 280.08
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8007999999999837
  episode_reward_min: -3.4499999999999704
  episodes_this_iter: 4
  episodes_total: 1328
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6989649692808132e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1777611944410535
          entropy_coeff: 0.009999999999999998
          kl: 0.010856503743009218
          policy_loss: 0.020261245634820728
          total_loss: 0.022113600787189273
          vf_explained_var: 0.07712306827306747
          vf_loss: 0.013629965008132988
    num_agent_steps_sampled: 370000
    num_agent_steps_trained: 370000
    num_steps_sampled: 370000
    num_steps_trained: 37

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,370,15695.1,370000,-2.8008,-2.19,-3.45,280.08


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 371000
  custom_metrics: {}
  date: 2021-10-24_20-53-07
  done: false
  episode_len_mean: 281.67
  episode_media: {}
  episode_reward_max: -2.4199999999999924
  episode_reward_mean: -2.816699999999983
  episode_reward_min: -3.4499999999999704
  episodes_this_iter: 4
  episodes_total: 1332
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6989649692808132e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1016532348261938
          entropy_coeff: 0.009999999999999998
          kl: 0.011603652587573204
          policy_loss: 0.025764935049745773
          total_loss: 0.028076492746671042
          vf_explained_var: 0.09228793531656265
          vf_loss: 0.013328089130421479
    num_agent_steps_sampled: 371000
    num_agent_steps_trained: 371000
    num_steps_sampled: 371000
    num_steps_trained: 371

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,371,15733.9,371000,-2.8167,-2.42,-3.45,281.67


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 372000
  custom_metrics: {}
  date: 2021-10-24_20-53-41
  done: false
  episode_len_mean: 283.81
  episode_media: {}
  episode_reward_max: -2.4199999999999924
  episode_reward_mean: -2.838099999999983
  episode_reward_min: -3.4499999999999704
  episodes_this_iter: 3
  episodes_total: 1335
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6989649692808132e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8952479951911503
          entropy_coeff: 0.009999999999999998
          kl: 0.022578459570428085
          policy_loss: 0.03617322279347314
          total_loss: 0.03908955206473668
          vf_explained_var: -0.037011999636888504
          vf_loss: 0.011868809511522867
    num_agent_steps_sampled: 372000
    num_agent_steps_trained: 372000
    num_steps_sampled: 372000
    num_steps_trained: 372

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,372,15767.6,372000,-2.8381,-2.42,-3.45,283.81


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 373000
  custom_metrics: {}
  date: 2021-10-24_20-54-18
  done: false
  episode_len_mean: 285.17
  episode_media: {}
  episode_reward_max: -2.4199999999999924
  episode_reward_mean: -2.8516999999999832
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 3
  episodes_total: 1338
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.548447453921218e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.138855399025811
          entropy_coeff: 0.009999999999999998
          kl: 0.005662391073464571
          policy_loss: 0.024213472339842054
          total_loss: 0.024286843091249465
          vf_explained_var: 0.06761633604764938
          vf_loss: 0.011461926096429428
    num_agent_steps_sampled: 373000
    num_agent_steps_trained: 373000
    num_steps_sampled: 373000
    num_steps_trained: 373000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,373,15805.4,373000,-2.8517,-2.42,-3.48,285.17


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 374000
  custom_metrics: {}
  date: 2021-10-24_20-54-56
  done: false
  episode_len_mean: 286.66
  episode_media: {}
  episode_reward_max: -2.4199999999999924
  episode_reward_mean: -2.866599999999983
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 3
  episodes_total: 1341
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.548447453921218e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.23865587843789
          entropy_coeff: 0.009999999999999998
          kl: 0.012177825493793017
          policy_loss: -0.11457357994384236
          total_loss: -0.11072404616408878
          vf_explained_var: 0.10872643440961838
          vf_loss: 0.01623609023582604
    num_agent_steps_sampled: 374000
    num_agent_steps_trained: 374000
    num_steps_sampled: 374000
    num_steps_trained: 374000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,374,15843.2,374000,-2.8666,-2.42,-3.48,286.66


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 375000
  custom_metrics: {}
  date: 2021-10-24_20-55-34
  done: false
  episode_len_mean: 288.47
  episode_media: {}
  episode_reward_max: -2.4199999999999924
  episode_reward_mean: -2.8846999999999823
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 1345
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.548447453921218e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.195998728275299
          entropy_coeff: 0.009999999999999998
          kl: 0.01937602764668769
          policy_loss: 0.02266892306506634
          total_loss: 0.02665745367606481
          vf_explained_var: 0.13336950540542603
          vf_loss: 0.01594851410223378
    num_agent_steps_sampled: 375000
    num_agent_steps_trained: 375000
    num_steps_sampled: 375000
    num_steps_trained: 375000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,375,15880.5,375000,-2.8847,-2.42,-3.48,288.47


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 376000
  custom_metrics: {}
  date: 2021-10-24_20-56-15
  done: false
  episode_len_mean: 289.04
  episode_media: {}
  episode_reward_max: -2.4199999999999924
  episode_reward_mean: -2.890399999999982
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 3
  episodes_total: 1348
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.548447453921218e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1650733166270786
          entropy_coeff: 0.009999999999999998
          kl: 0.011879339019596893
          policy_loss: -0.09729039470354715
          total_loss: -0.09622792088323169
          vf_explained_var: 0.29298511147499084
          vf_loss: 0.012713202929848597
    num_agent_steps_sampled: 376000
    num_agent_steps_trained: 376000
    num_steps_sampled: 376000
    num_steps_trained: 376000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,376,15922,376000,-2.8904,-2.42,-3.48,289.04




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 377000
  custom_metrics: {}
  date: 2021-10-24_20-57-13
  done: false
  episode_len_mean: 288.91
  episode_media: {}
  episode_reward_max: -2.4199999999999924
  episode_reward_mean: -2.8890999999999827
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 1352
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.548447453921218e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1376292533344692
          entropy_coeff: 0.009999999999999998
          kl: 0.011382991380469873
          policy_loss: -0.023673376027080747
          total_loss: -0.018505189650588564
          vf_explained_var: 0.21796809136867523
          vf_loss: 0.01654447950422764
    num_agent_steps_sampled: 377000
    num_agent_steps_trained: 377000
    num_steps_sampled: 377000
    num_steps_trained: 3770

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,377,15980.2,377000,-2.8891,-2.42,-3.48,288.91


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 378000
  custom_metrics: {}
  date: 2021-10-24_20-57-54
  done: false
  episode_len_mean: 288.59
  episode_media: {}
  episode_reward_max: -2.4199999999999924
  episode_reward_mean: -2.8858999999999826
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 1356
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.548447453921218e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1576404836442735
          entropy_coeff: 0.009999999999999998
          kl: 0.025971386291570298
          policy_loss: 0.0176632939113511
          total_loss: 0.021499241722954643
          vf_explained_var: 0.23491135239601135
          vf_loss: 0.015412350257651673
    num_agent_steps_sampled: 378000
    num_agent_steps_trained: 378000
    num_steps_sampled: 378000
    num_steps_trained: 378000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,378,16020.8,378000,-2.8859,-2.42,-3.48,288.59


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 379000
  custom_metrics: {}
  date: 2021-10-24_20-58-37
  done: false
  episode_len_mean: 289.32
  episode_media: {}
  episode_reward_max: -2.4199999999999924
  episode_reward_mean: -2.8931999999999825
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 3
  episodes_total: 1359
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.8226711808818293e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9992171320650313
          entropy_coeff: 0.009999999999999998
          kl: 0.010588041992629213
          policy_loss: -0.0684770035247008
          total_loss: -0.06470171958208085
          vf_explained_var: 0.17537198960781097
          vf_loss: 0.013767452765670088
    num_agent_steps_sampled: 379000
    num_agent_steps_trained: 379000
    num_steps_sampled: 379000
    num_steps_trained: 37900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,379,16063.4,379000,-2.8932,-2.42,-3.48,289.32


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 380000
  custom_metrics: {}
  date: 2021-10-24_20-59-17
  done: false
  episode_len_mean: 288.01
  episode_media: {}
  episode_reward_max: -2.4199999999999924
  episode_reward_mean: -2.8800999999999823
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 1363
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.8226711808818293e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9595857348706988
          entropy_coeff: 0.009999999999999998
          kl: 0.011537124467056811
          policy_loss: 0.008245847705337737
          total_loss: 0.013122100631395977
          vf_explained_var: 0.1629808098077774
          vf_loss: 0.014472103574209742
    num_agent_steps_sampled: 380000
    num_agent_steps_trained: 380000
    num_steps_sampled: 380000
    num_steps_trained: 38000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,380,16103.8,380000,-2.8801,-2.42,-3.48,288.01


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 381000
  custom_metrics: {}
  date: 2021-10-24_20-59-58
  done: false
  episode_len_mean: 287.33
  episode_media: {}
  episode_reward_max: -2.4199999999999924
  episode_reward_mean: -2.873299999999982
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 1367
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.8226711808818293e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.061099996831682
          entropy_coeff: 0.009999999999999998
          kl: 0.038487112426347087
          policy_loss: 0.022935958041085138
          total_loss: 0.02186265786488851
          vf_explained_var: 0.1652742326259613
          vf_loss: 0.009537686156626377
    num_agent_steps_sampled: 381000
    num_agent_steps_trained: 381000
    num_steps_sampled: 381000
    num_steps_trained: 381000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,381,16144.4,381000,-2.8733,-2.42,-3.48,287.33


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 382000
  custom_metrics: {}
  date: 2021-10-24_21-00-40
  done: false
  episode_len_mean: 286.45
  episode_media: {}
  episode_reward_max: -2.4199999999999924
  episode_reward_mean: -2.8644999999999827
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 3
  episodes_total: 1370
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.734006771322745e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0078988167974683
          entropy_coeff: 0.009999999999999998
          kl: 0.13888657108614588
          policy_loss: -0.1027494731048743
          total_loss: -0.10158653068873617
          vf_explained_var: 0.28834977746009827
          vf_loss: 0.011241852719750669
    num_agent_steps_sampled: 382000
    num_agent_steps_trained: 382000
    num_steps_sampled: 382000
    num_steps_trained: 382000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,382,16186.4,382000,-2.8645,-2.42,-3.48,286.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 383000
  custom_metrics: {}
  date: 2021-10-24_21-01-25
  done: false
  episode_len_mean: 284.01
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.8400999999999827
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 1374
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.601010156984115e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7489878025319842
          entropy_coeff: 0.009999999999999998
          kl: 0.01409796462159141
          policy_loss: -0.10060397899813123
          total_loss: -0.09466753121879366
          vf_explained_var: 0.31321340799331665
          vf_loss: 0.013426314221902026
    num_agent_steps_sampled: 383000
    num_agent_steps_trained: 383000
    num_steps_sampled: 383000
    num_steps_trained: 383000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,383,16231.5,383000,-2.8401,-2.39,-3.48,284.01


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 384000
  custom_metrics: {}
  date: 2021-10-24_21-02-09
  done: false
  episode_len_mean: 281.92
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -2.8191999999999835
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 1378
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.601010156984115e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8356172071562873
          entropy_coeff: 0.009999999999999998
          kl: 0.018122725958127084
          policy_loss: -0.1052244140042199
          total_loss: -0.09963909457955096
          vf_explained_var: 0.2565593421459198
          vf_loss: 0.013941476225025125
    num_agent_steps_sampled: 384000
    num_agent_steps_trained: 384000
    num_steps_sampled: 384000
    num_steps_trained: 384000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,384,16275.5,384000,-2.8192,-2.37,-3.48,281.92




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 385000
  custom_metrics: {}
  date: 2021-10-24_21-03-11
  done: false
  episode_len_mean: 278.48
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.784799999999984
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 5
  episodes_total: 1383
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.601010156984115e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9580364240540399
          entropy_coeff: 0.009999999999999998
          kl: 0.04910476336294644
          policy_loss: -0.01034717568092876
          total_loss: -0.005487828701734543
          vf_explained_var: 0.17771100997924805
          vf_loss: 0.014439672252370252
    num_agent_steps_sampled: 385000
    num_agent_steps_trained: 385000
    num_steps_sampled: 385000
    num_steps_trained: 385000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,385,16337.7,385000,-2.7848,-2.24,-3.48,278.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 386000
  custom_metrics: {}
  date: 2021-10-24_21-03-55
  done: false
  episode_len_mean: 276.99
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.769899999999985
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 1387
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2901515235476167e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9731785727871789
          entropy_coeff: 0.009999999999999998
          kl: 0.00905173535099494
          policy_loss: 0.022928281873464584
          total_loss: 0.02593997319539388
          vf_explained_var: 0.11356176435947418
          vf_loss: 0.012743471511122253
    num_agent_steps_sampled: 386000
    num_agent_steps_trained: 386000
    num_steps_sampled: 386000
    num_steps_trained: 386000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,386,16381.4,386000,-2.7699,-2.24,-3.48,276.99


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 387000
  custom_metrics: {}
  date: 2021-10-24_21-04-38
  done: false
  episode_len_mean: 276.26
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.7625999999999853
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 1391
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2901515235476167e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.93396771285269
          entropy_coeff: 0.009999999999999998
          kl: 0.009744595897456382
          policy_loss: 0.03744271116124259
          total_loss: 0.03956201589769787
          vf_explained_var: 0.08766969293355942
          vf_loss: 0.011458972034355005
    num_agent_steps_sampled: 387000
    num_agent_steps_trained: 387000
    num_steps_sampled: 387000
    num_steps_trained: 387000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,387,16424.6,387000,-2.7626,-2.24,-3.48,276.26


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 388000
  custom_metrics: {}
  date: 2021-10-24_21-05-19
  done: false
  episode_len_mean: 275.25
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.752499999999985
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 1395
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2901515235476167e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9330728405051761
          entropy_coeff: 0.009999999999999998
          kl: 0.006456368452756954
          policy_loss: 0.011444530139366785
          total_loss: 0.015047169393963285
          vf_explained_var: 0.07406757771968842
          vf_loss: 0.012933364044874907
    num_agent_steps_sampled: 388000
    num_agent_steps_trained: 388000
    num_steps_sampled: 388000
    num_steps_trained: 388000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,388,16465.7,388000,-2.7525,-2.24,-3.48,275.25


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 389000
  custom_metrics: {}
  date: 2021-10-24_21-06-02
  done: false
  episode_len_mean: 274.3
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.742999999999986
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 3
  episodes_total: 1398
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2901515235476167e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.862154057290819
          entropy_coeff: 0.009999999999999998
          kl: 0.006342398467862469
          policy_loss: -0.09831413047181235
          total_loss: -0.09310437010394203
          vf_explained_var: 0.06854759156703949
          vf_loss: 0.01383129796013236
    num_agent_steps_sampled: 389000
    num_agent_steps_trained: 389000
    num_steps_sampled: 389000
    num_steps_trained: 389000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,389,16508.7,389000,-2.743,-2.24,-3.48,274.3


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 390000
  custom_metrics: {}
  date: 2021-10-24_21-06-47
  done: false
  episode_len_mean: 272.69
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.726899999999985
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 1402
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2901515235476167e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7725193295213911
          entropy_coeff: 0.009999999999999998
          kl: 0.006082502330844294
          policy_loss: -0.10091878705554538
          total_loss: -0.0926982728143533
          vf_explained_var: 0.07116923481225967
          vf_loss: 0.015945705719706084
    num_agent_steps_sampled: 390000
    num_agent_steps_trained: 390000
    num_steps_sampled: 390000
    num_steps_trained: 390000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,390,16553.4,390000,-2.7269,-2.24,-3.48,272.69


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 391000
  custom_metrics: {}
  date: 2021-10-24_21-07-32
  done: false
  episode_len_mean: 270.9
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.7089999999999854
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 5
  episodes_total: 1407
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2901515235476167e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.768207542763816
          entropy_coeff: 0.009999999999999998
          kl: 0.00602460828986469
          policy_loss: -0.013996538147330284
          total_loss: -0.004577797195977635
          vf_explained_var: 0.10394798964262009
          vf_loss: 0.01710081295006805
    num_agent_steps_sampled: 391000
    num_agent_steps_trained: 391000
    num_steps_sampled: 391000
    num_steps_trained: 391000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,391,16598.1,391000,-2.709,-2.24,-3.48,270.9




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 392000
  custom_metrics: {}
  date: 2021-10-24_21-08-34
  done: false
  episode_len_mean: 268.7
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.686999999999987
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 1411
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2901515235476167e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7259142345852322
          entropy_coeff: 0.009999999999999998
          kl: 0.01009136880972916
          policy_loss: 0.019439689566691717
          total_loss: 0.0252904050052166
          vf_explained_var: 0.15335208177566528
          vf_loss: 0.013109852611604664
    num_agent_steps_sampled: 392000
    num_agent_steps_trained: 392000
    num_steps_sampled: 392000
    num_steps_trained: 392000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,392,16660.5,392000,-2.687,-2.1,-3.48,268.7


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 393000
  custom_metrics: {}
  date: 2021-10-24_21-09-18
  done: false
  episode_len_mean: 267.04
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.670399999999988
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 1415
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2901515235476167e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7576088706652323
          entropy_coeff: 0.009999999999999998
          kl: 0.004423154599258005
          policy_loss: 0.051379086739487115
          total_loss: 0.05422665112548404
          vf_explained_var: 0.15533453226089478
          vf_loss: 0.010423646825883124
    num_agent_steps_sampled: 393000
    num_agent_steps_trained: 393000
    num_steps_sampled: 393000
    num_steps_trained: 393000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,393,16703.9,393000,-2.6704,-2.1,-3.48,267.04


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 394000
  custom_metrics: {}
  date: 2021-10-24_21-10-03
  done: false
  episode_len_mean: 265.41
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6540999999999877
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 1419
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.450757617738084e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7186519655916426
          entropy_coeff: 0.009999999999999998
          kl: 0.007883757854675683
          policy_loss: 0.013750941554705302
          total_loss: 0.02045121921433343
          vf_explained_var: 0.1324392557144165
          vf_loss: 0.01388679366144869
    num_agent_steps_sampled: 394000
    num_agent_steps_trained: 394000
    num_steps_sampled: 394000
    num_steps_trained: 394000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,394,16749.7,394000,-2.6541,-2.1,-3.48,265.41


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 395000
  custom_metrics: {}
  date: 2021-10-24_21-10-47
  done: false
  episode_len_mean: 264.33
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6432999999999875
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 1423
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.450757617738084e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.671196170647939
          entropy_coeff: 0.009999999999999998
          kl: 0.006668677669869143
          policy_loss: 0.026301618748241
          total_loss: 0.03308109177483453
          vf_explained_var: 0.11427232623100281
          vf_loss: 0.013491431437432765
    num_agent_steps_sampled: 395000
    num_agent_steps_trained: 395000
    num_steps_sampled: 395000
    num_steps_trained: 395000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,395,16793.5,395000,-2.6433,-2.1,-3.48,264.33


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 396000
  custom_metrics: {}
  date: 2021-10-24_21-11-34
  done: false
  episode_len_mean: 263.26
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6325999999999876
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 1427
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.450757617738084e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6343485858705309
          entropy_coeff: 0.009999999999999998
          kl: 0.005025496946196818
          policy_loss: 0.04929559098349677
          total_loss: 0.054117249449094136
          vf_explained_var: 0.09008705615997314
          vf_loss: 0.011165139480080041
    num_agent_steps_sampled: 396000
    num_agent_steps_trained: 396000
    num_steps_sampled: 396000
    num_steps_trained: 396000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,396,16840.3,396000,-2.6326,-2.1,-3.48,263.26


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 397000
  custom_metrics: {}
  date: 2021-10-24_21-12-19
  done: false
  episode_len_mean: 261.68
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.616799999999988
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 1431
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.450757617738084e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.526760090721978
          entropy_coeff: 0.009999999999999998
          kl: 0.004567995121256748
          policy_loss: 0.03210422578785155
          total_loss: 0.04004263149367438
          vf_explained_var: 0.04499439522624016
          vf_loss: 0.013206002561168538
    num_agent_steps_sampled: 397000
    num_agent_steps_trained: 397000
    num_steps_sampled: 397000
    num_steps_trained: 397000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,397,16885.4,397000,-2.6168,-2.1,-3.48,261.68


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 398000
  custom_metrics: {}
  date: 2021-10-24_21-13-07
  done: false
  episode_len_mean: 258.86
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.588599999999989
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 1435
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.225378808869042e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.5952579617500305
          entropy_coeff: 0.009999999999999998
          kl: 0.007001673178740791
          policy_loss: -0.013132438477542665
          total_loss: -0.0035921893186039396
          vf_explained_var: 0.057190291583538055
          vf_loss: 0.015492830021927754
    num_agent_steps_sampled: 398000
    num_agent_steps_trained: 398000
    num_steps_sampled: 398000
    num_steps_trained: 398

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,398,16933,398000,-2.5886,-2.1,-3.48,258.86




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 399000
  custom_metrics: {}
  date: 2021-10-24_21-14-10
  done: false
  episode_len_mean: 255.49
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.5548999999999893
  episode_reward_min: -3.2099999999999755
  episodes_this_iter: 5
  episodes_total: 1440
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.225378808869042e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6618087788422903
          entropy_coeff: 0.009999999999999998
          kl: 0.016500946786711285
          policy_loss: 0.030830799871020848
          total_loss: 0.03975036193927129
          vf_explained_var: 0.1334349811077118
          vf_loss: 0.015537644343243704
    num_agent_steps_sampled: 399000
    num_agent_steps_trained: 399000
    num_steps_sampled: 399000
    num_steps_trained: 399000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,399,16996.6,399000,-2.5549,-2.07,-3.21,255.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 400000
  custom_metrics: {}
  date: 2021-10-24_21-14-56
  done: false
  episode_len_mean: 253.34
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.5333999999999897
  episode_reward_min: -3.0699999999999785
  episodes_this_iter: 4
  episodes_total: 1444
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.225378808869042e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7684244632720947
          entropy_coeff: 0.009999999999999998
          kl: 0.01881990548775734
          policy_loss: 0.008261456920040978
          total_loss: 0.014800553644696872
          vf_explained_var: 0.1677398830652237
          vf_loss: 0.014223333199818928
    num_agent_steps_sampled: 400000
    num_agent_steps_trained: 400000
    num_steps_sampled: 400000
    num_steps_trained: 400000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,400,17042.7,400000,-2.5334,-2.07,-3.07,253.34


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 401000
  custom_metrics: {}
  date: 2021-10-24_21-15-40
  done: false
  episode_len_mean: 252.65
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.5264999999999898
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 3
  episodes_total: 1447
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.225378808869042e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9986608664194743
          entropy_coeff: 0.009999999999999998
          kl: 0.06259100513812209
          policy_loss: -0.09706904052032364
          total_loss: -0.08364139559368293
          vf_explained_var: 0.12289714068174362
          vf_loss: 0.02341423724881477
    num_agent_steps_sampled: 401000
    num_agent_steps_trained: 401000
    num_steps_sampled: 401000
    num_steps_trained: 401000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,401,17086.2,401000,-2.5265,-2.07,-3.12,252.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 402000
  custom_metrics: {}
  date: 2021-10-24_21-16-26
  done: false
  episode_len_mean: 251.49
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.51489999999999
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 5
  episodes_total: 1452
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.838068213303563e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6800918089018928
          entropy_coeff: 0.009999999999999998
          kl: 0.008349809002724366
          policy_loss: -0.0075525311960114375
          total_loss: -0.0004094137085808648
          vf_explained_var: 0.15449286997318268
          vf_loss: 0.013944030377186008
    num_agent_steps_sampled: 402000
    num_agent_steps_trained: 402000
    num_steps_sampled: 402000
    num_steps_trained: 402000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,402,17131.8,402000,-2.5149,-2.07,-3.12,251.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 403000
  custom_metrics: {}
  date: 2021-10-24_21-17-09
  done: false
  episode_len_mean: 250.14
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.5013999999999905
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 1456
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.838068213303563e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.5965823272864024
          entropy_coeff: 0.009999999999999998
          kl: 0.006847985672676692
          policy_loss: 0.029777164260546368
          total_loss: 0.037048572219080395
          vf_explained_var: 0.1129002496600151
          vf_loss: 0.013237232104357746
    num_agent_steps_sampled: 403000
    num_agent_steps_trained: 403000
    num_steps_sampled: 403000
    num_steps_trained: 403000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,403,17175.6,403000,-2.5014,-2.07,-3.12,250.14


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 404000
  custom_metrics: {}
  date: 2021-10-24_21-17-55
  done: false
  episode_len_mean: 248.74
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.4873999999999903
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 1460
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.838068213303563e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.5923613548278809
          entropy_coeff: 0.009999999999999998
          kl: 0.0022512562425940057
          policy_loss: 0.02564560489522086
          total_loss: 0.03394570748011271
          vf_explained_var: 0.08592036366462708
          vf_loss: 0.01422371334499783
    num_agent_steps_sampled: 404000
    num_agent_steps_trained: 404000
    num_steps_sampled: 404000
    num_steps_trained: 404000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,404,17220.6,404000,-2.4874,-2.07,-3.12,248.74


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 405000
  custom_metrics: {}
  date: 2021-10-24_21-18-41
  done: false
  episode_len_mean: 247.48
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.4747999999999912
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 1464
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4190341066517815e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.5533870001633961
          entropy_coeff: 0.009999999999999998
          kl: 0.003360347579299451
          policy_loss: 0.021733084321022035
          total_loss: 0.03070814535021782
          vf_explained_var: 0.06607069820165634
          vf_loss: 0.01450892761349678
    num_agent_steps_sampled: 405000
    num_agent_steps_trained: 405000
    num_steps_sampled: 405000
    num_steps_trained: 405000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,405,17266.7,405000,-2.4748,-2.07,-3.12,247.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 406000
  custom_metrics: {}
  date: 2021-10-24_21-19-27
  done: false
  episode_len_mean: 246.11
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.461099999999991
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 1468
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2095170533258907e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.5072559767299228
          entropy_coeff: 0.009999999999999998
          kl: 0.0030073411195856025
          policy_loss: -0.013122384084595574
          total_loss: -0.0037312043209870656
          vf_explained_var: 0.06578963249921799
          vf_loss: 0.014463738693545261
    num_agent_steps_sampled: 406000
    num_agent_steps_trained: 406000
    num_steps_sampled: 406000
    num_steps_trained: 406000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,406,17313.3,406000,-2.4611,-2.07,-3.12,246.11




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 407000
  custom_metrics: {}
  date: 2021-10-24_21-20-34
  done: false
  episode_len_mean: 244.32
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.4431999999999916
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 5
  episodes_total: 1473
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.047585266629454e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.4981321762005488
          entropy_coeff: 0.009999999999999998
          kl: 0.012079819292503144
          policy_loss: -0.019485536052121055
          total_loss: -0.006482398758331935
          vf_explained_var: 0.10474783927202225
          vf_loss: 0.017984455844594373
    num_agent_steps_sampled: 407000
    num_agent_steps_trained: 407000
    num_steps_sampled: 407000
    num_steps_trained: 407000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,407,17380.2,407000,-2.4432,-2.07,-3.12,244.32


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 408000
  custom_metrics: {}
  date: 2021-10-24_21-21-20
  done: false
  episode_len_mean: 243.94
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.4393999999999916
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 1477
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.047585266629454e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5278486914104885
          entropy_coeff: 0.009999999999999998
          kl: 0.005392371257866241
          policy_loss: 0.016493286275201375
          total_loss: 0.02443094319767422
          vf_explained_var: 0.13076801598072052
          vf_loss: 0.01321614392929607
    num_agent_steps_sampled: 408000
    num_agent_steps_trained: 408000
    num_steps_sampled: 408000
    num_steps_trained: 408000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,408,17426,408000,-2.4394,-2.07,-3.12,243.94


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 409000
  custom_metrics: {}
  date: 2021-10-24_21-22-06
  done: false
  episode_len_mean: 243.91
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.439099999999992
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 1481
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.047585266629454e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5524451911449433
          entropy_coeff: 0.009999999999999998
          kl: 0.007157767614722053
          policy_loss: 0.0246992949810293
          total_loss: 0.03269439662496249
          vf_explained_var: 0.11587737500667572
          vf_loss: 0.013519552763965395
    num_agent_steps_sampled: 409000
    num_agent_steps_trained: 409000
    num_steps_sampled: 409000
    num_steps_trained: 409000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,409,17472.2,409000,-2.4391,-2.07,-3.12,243.91


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 410000
  custom_metrics: {}
  date: 2021-10-24_21-22-50
  done: false
  episode_len_mean: 243.88
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.438799999999992
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 1485
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.047585266629454e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6423541704813639
          entropy_coeff: 0.009999999999999998
          kl: 0.004129334445809718
          policy_loss: 0.006249169963929388
          total_loss: 0.013697903768883812
          vf_explained_var: 0.07866150885820389
          vf_loss: 0.013872274197638035
    num_agent_steps_sampled: 410000
    num_agent_steps_trained: 410000
    num_steps_sampled: 410000
    num_steps_trained: 410000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,410,17515.9,410000,-2.4388,-2.07,-3.12,243.88


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 411000
  custom_metrics: {}
  date: 2021-10-24_21-23-36
  done: false
  episode_len_mean: 243.42
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.434199999999992
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 1489
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.023792633314727e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6343481328752306
          entropy_coeff: 0.009999999999999998
          kl: 0.0038981122458380925
          policy_loss: -0.06029166570968098
          total_loss: -0.052253508236673143
          vf_explained_var: 0.0663580670952797
          vf_loss: 0.014381636596388287
    num_agent_steps_sampled: 411000
    num_agent_steps_trained: 411000
    num_steps_sampled: 411000
    num_steps_trained: 411000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,411,17562.3,411000,-2.4342,-2.07,-3.12,243.42


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 412000
  custom_metrics: {}
  date: 2021-10-24_21-24-22
  done: false
  episode_len_mean: 243.16
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.4315999999999924
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 1493
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5118963166573634e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6399240056673686
          entropy_coeff: 0.009999999999999998
          kl: 0.008534700716153962
          policy_loss: -0.044794425575269595
          total_loss: -0.03668432997332679
          vf_explained_var: 0.08649353682994843
          vf_loss: 0.014509336495151123
    num_agent_steps_sampled: 412000
    num_agent_steps_trained: 412000
    num_steps_sampled: 412000
    num_steps_trained: 412000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,412,17608,412000,-2.4316,-2.07,-3.12,243.16


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 413000
  custom_metrics: {}
  date: 2021-10-24_21-25-09
  done: false
  episode_len_mean: 242.36
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.423599999999992
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 5
  episodes_total: 1498
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5118963166573634e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6874293386936188
          entropy_coeff: 0.009999999999999998
          kl: 0.015079930915531274
          policy_loss: -0.026781960907909606
          total_loss: -0.016947036816014185
          vf_explained_var: 0.15391336381435394
          vf_loss: 0.016709214490320947
    num_agent_steps_sampled: 413000
    num_agent_steps_trained: 413000
    num_steps_sampled: 413000
    num_steps_trained: 413000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,413,17654.4,413000,-2.4236,-2.07,-3.12,242.36




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 414000
  custom_metrics: {}
  date: 2021-10-24_21-26-14
  done: false
  episode_len_mean: 241.45
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4144999999999923
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 1502
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5118963166573634e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6536349362797207
          entropy_coeff: 0.009999999999999998
          kl: 0.004583762714978295
          policy_loss: 0.029735791352060108
          total_loss: 0.03598216027021408
          vf_explained_var: 0.15211939811706543
          vf_loss: 0.012782719627850585
    num_agent_steps_sampled: 414000
    num_agent_steps_trained: 414000
    num_steps_sampled: 414000
    num_steps_trained: 414

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,414,17719.4,414000,-2.4145,-2.05,-3.12,241.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 415000
  custom_metrics: {}
  date: 2021-10-24_21-27-00
  done: false
  episode_len_mean: 241.16
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4115999999999924
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 1506
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.559481583286817e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6242816163433923
          entropy_coeff: 0.009999999999999998
          kl: 0.005759052085408535
          policy_loss: 0.018512108715044127
          total_loss: 0.025168837855259578
          vf_explained_var: 0.17187164723873138
          vf_loss: 0.012899544907526837
    num_agent_steps_sampled: 415000
    num_agent_steps_trained: 415000
    num_steps_sampled: 415000
    num_steps_trained: 415

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,415,17765.8,415000,-2.4116,-2.05,-3.12,241.16


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 416000
  custom_metrics: {}
  date: 2021-10-24_21-27-46
  done: false
  episode_len_mean: 241.0
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4099999999999926
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 1510
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.559481583286817e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.5126955611838235
          entropy_coeff: 0.009999999999999998
          kl: 0.005162060296219313
          policy_loss: -0.03851890307333734
          total_loss: -0.03022738364007738
          vf_explained_var: 0.13823185861110687
          vf_loss: 0.013418475352227687
    num_agent_steps_sampled: 416000
    num_agent_steps_trained: 416000
    num_steps_sampled: 416000
    num_steps_trained: 4160

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,416,17811.7,416000,-2.41,-2.05,-3.12,241


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 417000
  custom_metrics: {}
  date: 2021-10-24_21-28-33
  done: false
  episode_len_mean: 240.37
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4036999999999926
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 5
  episodes_total: 1515
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.559481583286817e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.5178212391005622
          entropy_coeff: 0.009999999999999998
          kl: 0.0075722057877181705
          policy_loss: -0.0377687721616692
          total_loss: -0.025513632678323322
          vf_explained_var: 0.11814273148775101
          vf_loss: 0.01743335279946526
    num_agent_steps_sampled: 417000
    num_agent_steps_trained: 417000
    num_steps_sampled: 417000
    num_steps_trained: 417

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,417,17859,417000,-2.4037,-2.05,-3.12,240.37


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 418000
  custom_metrics: {}
  date: 2021-10-24_21-29-18
  done: false
  episode_len_mean: 240.0
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.399999999999993
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 1519
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.559481583286817e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7896118885940976
          entropy_coeff: 0.009999999999999998
          kl: 0.04550722363494333
          policy_loss: 0.005942295326126946
          total_loss: 0.010414002504613664
          vf_explained_var: 0.21094195544719696
          vf_loss: 0.012367824040767219
    num_agent_steps_sampled: 418000
    num_agent_steps_trained: 418000
    num_steps_sampled: 418000
    num_steps_trained: 418000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,418,17903.8,418000,-2.4,-2.05,-3.12,240


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 419000
  custom_metrics: {}
  date: 2021-10-24_21-30-02
  done: false
  episode_len_mean: 239.83
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.3982999999999923
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 3
  episodes_total: 1522
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.133922237493023e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8127569496631623
          entropy_coeff: 0.009999999999999998
          kl: 0.0175341734315288
          policy_loss: -0.1285882764392429
          total_loss: -0.12417875933978292
          vf_explained_var: 0.24635690450668335
          vf_loss: 0.012537084085245927
    num_agent_steps_sampled: 419000
    num_agent_steps_trained: 419000
    num_steps_sampled: 419000
    num_steps_trained: 419000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,419,17947.4,419000,-2.3983,-2.05,-3.12,239.83


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 420000
  custom_metrics: {}
  date: 2021-10-24_21-30-49
  done: false
  episode_len_mean: 240.67
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4066999999999923
  episode_reward_min: -3.5499999999999683
  episodes_this_iter: 5
  episodes_total: 1527
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.133922237493023e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.637682671017117
          entropy_coeff: 0.009999999999999998
          kl: 0.005182990338880384
          policy_loss: -0.0031902136902014415
          total_loss: 0.004752889358335071
          vf_explained_var: 0.20349115133285522
          vf_loss: 0.014319929863429733
    num_agent_steps_sampled: 420000
    num_agent_steps_trained: 420000
    num_steps_sampled: 420000
    num_steps_trained: 42

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,420,17994.7,420000,-2.4067,-2.05,-3.55,240.67




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 421000
  custom_metrics: {}
  date: 2021-10-24_21-31-57
  done: false
  episode_len_mean: 239.97
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.399699999999993
  episode_reward_min: -3.5499999999999683
  episodes_this_iter: 4
  episodes_total: 1531
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.133922237493023e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5749255551232232
          entropy_coeff: 0.009999999999999998
          kl: 0.009716021784446187
          policy_loss: 0.030629120022058486
          total_loss: 0.03782503315144115
          vf_explained_var: 0.13547693192958832
          vf_loss: 0.012945167233960496
    num_agent_steps_sampled: 421000
    num_agent_steps_trained: 421000
    num_steps_sampled: 421000
    num_steps_trained: 42100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,421,18062.5,421000,-2.3997,-2.05,-3.55,239.97


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 422000
  custom_metrics: {}
  date: 2021-10-24_21-32-44
  done: false
  episode_len_mean: 239.79
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.397899999999993
  episode_reward_min: -3.5499999999999683
  episodes_this_iter: 4
  episodes_total: 1535
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.133922237493023e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5526306238439348
          entropy_coeff: 0.009999999999999998
          kl: 0.005029192991078288
          policy_loss: -0.06713626322646936
          total_loss: -0.057811261754896905
          vf_explained_var: 0.08454915136098862
          vf_loss: 0.014851307165291575
    num_agent_steps_sampled: 422000
    num_agent_steps_trained: 422000
    num_steps_sampled: 422000
    num_steps_trained: 422

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,422,18109.1,422000,-2.3979,-2.05,-3.55,239.79


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 423000
  custom_metrics: {}
  date: 2021-10-24_21-33-28
  done: false
  episode_len_mean: 239.64
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.396399999999993
  episode_reward_min: -3.5499999999999683
  episodes_this_iter: 5
  episodes_total: 1540
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.133922237493023e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5883437342113919
          entropy_coeff: 0.009999999999999998
          kl: 0.02280316241572267
          policy_loss: -0.0114542156457901
          total_loss: -0.0005204679237471686
          vf_explained_var: 0.1512882560491562
          vf_loss: 0.016817182892312606
    num_agent_steps_sampled: 423000
    num_agent_steps_trained: 423000
    num_steps_sampled: 423000
    num_steps_trained: 42300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,423,18153.3,423000,-2.3964,-2.05,-3.55,239.64


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 424000
  custom_metrics: {}
  date: 2021-10-24_21-34-15
  done: false
  episode_len_mean: 239.39
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.393899999999993
  episode_reward_min: -3.5499999999999683
  episodes_this_iter: 4
  episodes_total: 1544
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7008833562395337e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5103948096434275
          entropy_coeff: 0.009999999999999998
          kl: 0.00407325817660396
          policy_loss: 0.03644231499897109
          total_loss: 0.043602474199401005
          vf_explained_var: 0.15331698954105377
          vf_loss: 0.012264103618346983
    num_agent_steps_sampled: 424000
    num_agent_steps_trained: 424000
    num_steps_sampled: 424000
    num_steps_trained: 42400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,424,18200.7,424000,-2.3939,-2.05,-3.55,239.39


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 425000
  custom_metrics: {}
  date: 2021-10-24_21-35-01
  done: false
  episode_len_mean: 238.24
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.382399999999993
  episode_reward_min: -3.5499999999999683
  episodes_this_iter: 4
  episodes_total: 1548
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.504416781197669e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.44235305719905427
          entropy_coeff: 0.009999999999999998
          kl: 0.005622666412299182
          policy_loss: -0.0003214810457494524
          total_loss: 0.009184314227766461
          vf_explained_var: 0.09545648097991943
          vf_loss: 0.013929323831366167
    num_agent_steps_sampled: 425000
    num_agent_steps_trained: 425000
    num_steps_sampled: 425000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,425,18247,425000,-2.3824,-2.05,-3.55,238.24


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 426000
  custom_metrics: {}
  date: 2021-10-24_21-35-49
  done: false
  episode_len_mean: 237.79
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.3778999999999932
  episode_reward_min: -3.5499999999999683
  episodes_this_iter: 5
  episodes_total: 1553
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.504416781197669e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.5118507772684098
          entropy_coeff: 0.009999999999999998
          kl: 0.009619448992619241
          policy_loss: -0.02346307047539287
          total_loss: -0.014631730483637915
          vf_explained_var: 0.31590554118156433
          vf_loss: 0.013949847707731857
    num_agent_steps_sampled: 426000
    num_agent_steps_trained: 426000
    num_steps_sampled: 426000
    num_steps_trained: 42

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,426,18294.9,426000,-2.3779,-2.05,-3.55,237.79


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 427000
  custom_metrics: {}
  date: 2021-10-24_21-36-36
  done: false
  episode_len_mean: 237.49
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.374899999999993
  episode_reward_min: -3.5499999999999683
  episodes_this_iter: 4
  episodes_total: 1557
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.504416781197669e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6605514221721225
          entropy_coeff: 0.009999999999999998
          kl: 0.00634973678325663
          policy_loss: 0.0032103813770744537
          total_loss: 0.00742081298182408
          vf_explained_var: 0.3873783051967621
          vf_loss: 0.010815945143500963
    num_agent_steps_sampled: 427000
    num_agent_steps_trained: 427000
    num_steps_sampled: 427000
    num_steps_trained: 427000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,427,18341.6,427000,-2.3749,-2.05,-3.55,237.49




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 428000
  custom_metrics: {}
  date: 2021-10-24_21-37-42
  done: false
  episode_len_mean: 236.97
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3696999999999933
  episode_reward_min: -3.5499999999999683
  episodes_this_iter: 4
  episodes_total: 1561
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.504416781197669e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8817846046553718
          entropy_coeff: 0.009999999999999998
          kl: 0.1142876712539512
          policy_loss: 0.034389944622914
          total_loss: 0.033433875400159095
          vf_explained_var: 0.6089191436767578
          vf_loss: 0.007861777338095836
    num_agent_steps_sampled: 428000
    num_agent_steps_trained: 428000
    num_steps_sampled: 428000
    num_steps_trained: 428000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,428,18407.6,428000,-2.3697,-2.01,-3.55,236.97


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 429000
  custom_metrics: {}
  date: 2021-10-24_21-38-16
  done: false
  episode_len_mean: 239.42
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3941999999999926
  episode_reward_min: -3.6399999999999664
  episodes_this_iter: 3
  episodes_total: 1564
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2756625171796505e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1655770950847202
          entropy_coeff: 0.009999999999999998
          kl: 0.05682922505564597
          policy_loss: -0.020072165090176793
          total_loss: -0.023660537434948815
          vf_explained_var: 0.5808230638504028
          vf_loss: 0.008067397680133582
    num_agent_steps_sampled: 429000
    num_agent_steps_trained: 429000
    num_steps_sampled: 429000
    num_steps_trained: 429

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,429,18441.7,429000,-2.3942,-2.01,-3.64,239.42


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 430000
  custom_metrics: {}
  date: 2021-10-24_21-38-48
  done: false
  episode_len_mean: 242.81
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4280999999999917
  episode_reward_min: -3.6399999999999664
  episodes_this_iter: 3
  episodes_total: 1567
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9134937757694756e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1802938765949673
          entropy_coeff: 0.009999999999999998
          kl: 0.0494819682036668
          policy_loss: 0.005361303769879871
          total_loss: 0.003784504739774598
          vf_explained_var: 0.1349414438009262
          vf_loss: 0.010226138515604867
    num_agent_steps_sampled: 430000
    num_agent_steps_trained: 430000
    num_steps_sampled: 430000
    num_steps_trained: 430000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,430,18473.6,430000,-2.4281,-2.01,-3.64,242.81


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 431000
  custom_metrics: {}
  date: 2021-10-24_21-39-20
  done: false
  episode_len_mean: 247.0
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.469999999999991
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 1570
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8702406636542135e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.3579500026173061
          entropy_coeff: 0.009999999999999998
          kl: 0.020315968521281667
          policy_loss: 0.05223436984750959
          total_loss: 0.047990986125336754
          vf_explained_var: 0.09304214268922806
          vf_loss: 0.00933611689057822
    num_agent_steps_sampled: 431000
    num_agent_steps_trained: 431000
    num_steps_sampled: 431000
    num_steps_trained: 431000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,431,18505.8,431000,-2.47,-2.01,-3.74,247


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 432000
  custom_metrics: {}
  date: 2021-10-24_21-39-47
  done: false
  episode_len_mean: 250.48
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.5047999999999906
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 2
  episodes_total: 1572
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.30536099548132e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.2337263862291972
          entropy_coeff: 0.009999999999999998
          kl: 0.04368801428894089
          policy_loss: -0.043230526314841373
          total_loss: -0.04825977699624168
          vf_explained_var: -0.16214802861213684
          vf_loss: 0.007308010023552925
    num_agent_steps_sampled: 432000
    num_agent_steps_trained: 432000
    num_steps_sampled: 432000
    num_steps_trained: 43200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,432,18532.7,432000,-2.5048,-2.01,-4.23,250.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 433000
  custom_metrics: {}
  date: 2021-10-24_21-40-24
  done: false
  episode_len_mean: 252.99
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.5298999999999894
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 3
  episodes_total: 1575
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.458041493221977e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.126142062081231
          entropy_coeff: 0.009999999999999998
          kl: 0.011963673292342176
          policy_loss: -0.09122010328703456
          total_loss: -0.08802428038583862
          vf_explained_var: 0.022445764392614365
          vf_loss: 0.014457242573714918
    num_agent_steps_sampled: 433000
    num_agent_steps_trained: 433000
    num_steps_sampled: 433000
    num_steps_trained: 43300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,433,18569.1,433000,-2.5299,-2.01,-4.23,252.99


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 434000
  custom_metrics: {}
  date: 2021-10-24_21-41-03
  done: false
  episode_len_mean: 255.25
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.552499999999989
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 4
  episodes_total: 1579
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.458041493221977e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.145517365137736
          entropy_coeff: 0.009999999999999998
          kl: 0.012316045331838646
          policy_loss: 0.009463738732867771
          total_loss: 0.012455690569347805
          vf_explained_var: 0.06300736218690872
          vf_loss: 0.014447124923268955
    num_agent_steps_sampled: 434000
    num_agent_steps_trained: 434000
    num_steps_sampled: 434000
    num_steps_trained: 434000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,434,18607.8,434000,-2.5525,-2.01,-4.23,255.25


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 435000
  custom_metrics: {}
  date: 2021-10-24_21-41-42
  done: false
  episode_len_mean: 256.63
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.5662999999999885
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 3
  episodes_total: 1582
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.458041493221977e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.0839936746491325
          entropy_coeff: 0.009999999999999998
          kl: 0.008289639170887694
          policy_loss: -0.09565649686588182
          total_loss: -0.09190398040744993
          vf_explained_var: 0.05440456047654152
          vf_loss: 0.014592452264494366
    num_agent_steps_sampled: 435000
    num_agent_steps_trained: 435000
    num_steps_sampled: 435000
    num_steps_trained: 43500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,435,18647.5,435000,-2.5663,-2.01,-4.23,256.63


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 436000
  custom_metrics: {}
  date: 2021-10-24_21-42-19
  done: false
  episode_len_mean: 258.71
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.587099999999989
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 4
  episodes_total: 1586
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.458041493221977e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.129228871398502
          entropy_coeff: 0.009999999999999998
          kl: 0.013577293193857464
          policy_loss: 0.01698992517259386
          total_loss: 0.020103943596283595
          vf_explained_var: 0.09833589196205139
          vf_loss: 0.014406306317283048
    num_agent_steps_sampled: 436000
    num_agent_steps_trained: 436000
    num_steps_sampled: 436000
    num_steps_trained: 436000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,436,18684,436000,-2.5871,-2.01,-4.23,258.71


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 437000
  custom_metrics: {}
  date: 2021-10-24_21-43-00
  done: false
  episode_len_mean: 260.07
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.600699999999988
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 3
  episodes_total: 1589
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.458041493221977e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9794324431154463
          entropy_coeff: 0.009999999999999998
          kl: 0.00695080534713216
          policy_loss: -0.0007292752464612325
          total_loss: -0.0003388460311624739
          vf_explained_var: 0.14634378254413605
          vf_loss: 0.010184754512738436
    num_agent_steps_sampled: 437000
    num_agent_steps_trained: 437000
    num_steps_sampled: 437000
    num_steps_trained: 437

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,437,18725.2,437000,-2.6007,-2.01,-4.23,260.07




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 438000
  custom_metrics: {}
  date: 2021-10-24_21-43-59
  done: false
  episode_len_mean: 260.96
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.6095999999999884
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 4
  episodes_total: 1593
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.458041493221977e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9131339920891656
          entropy_coeff: 0.009999999999999998
          kl: 0.046109693998972606
          policy_loss: 0.004050191616018614
          total_loss: 0.008901129373245768
          vf_explained_var: 0.19382330775260925
          vf_loss: 0.013982278946787118
    num_agent_steps_sampled: 438000
    num_agent_steps_trained: 438000
    num_steps_sampled: 438000
    num_steps_trained: 43800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,438,18784,438000,-2.6096,-2.01,-4.23,260.96


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 439000
  custom_metrics: {}
  date: 2021-10-24_21-44-42
  done: false
  episode_len_mean: 261.45
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.6144999999999885
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 4
  episodes_total: 1597
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.687062239832968e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6952101164393955
          entropy_coeff: 0.009999999999999998
          kl: 0.011718329190348698
          policy_loss: 0.017018431425094606
          total_loss: 0.02450580137471358
          vf_explained_var: 0.17372797429561615
          vf_loss: 0.014439470786601304
    num_agent_steps_sampled: 439000
    num_agent_steps_trained: 439000
    num_steps_sampled: 439000
    num_steps_trained: 439000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,439,18827.2,439000,-2.6145,-2.01,-4.23,261.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 440000
  custom_metrics: {}
  date: 2021-10-24_21-45-24
  done: false
  episode_len_mean: 262.88
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.6287999999999876
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 4
  episodes_total: 1601
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.687062239832968e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.853807137409846
          entropy_coeff: 0.009999999999999998
          kl: 0.02980838900510921
          policy_loss: 0.0010872866544458602
          total_loss: 0.006460420621765985
          vf_explained_var: 0.26049384474754333
          vf_loss: 0.013911199673182434
    num_agent_steps_sampled: 440000
    num_agent_steps_trained: 440000
    num_steps_sampled: 440000
    num_steps_trained: 440000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,440,18869.3,440000,-2.6288,-2.01,-4.23,262.88


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 441000
  custom_metrics: {}
  date: 2021-10-24_21-46-07
  done: false
  episode_len_mean: 263.39
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.6338999999999873
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 4
  episodes_total: 1605
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4530593359749449e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7991268297036489
          entropy_coeff: 0.009999999999999998
          kl: 0.010508596465364662
          policy_loss: 0.001005079514450497
          total_loss: 0.0060324055453141534
          vf_explained_var: 0.28017038106918335
          vf_loss: 0.013018594195859299
    num_agent_steps_sampled: 441000
    num_agent_steps_trained: 441000
    num_steps_sampled: 441000
    num_steps_trained: 441

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,441,18912.1,441000,-2.6339,-2.01,-4.23,263.39


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 442000
  custom_metrics: {}
  date: 2021-10-24_21-46-48
  done: false
  episode_len_mean: 264.25
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.642499999999988
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 3
  episodes_total: 1608
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4530593359749449e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7216790967517429
          entropy_coeff: 0.009999999999999998
          kl: 0.006868570406418131
          policy_loss: -0.08412294040123622
          total_loss: -0.07871627195013894
          vf_explained_var: 0.22554855048656464
          vf_loss: 0.012623460311442614
    num_agent_steps_sampled: 442000
    num_agent_steps_trained: 442000
    num_steps_sampled: 442000
    num_steps_trained: 44200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,442,18953.1,442000,-2.6425,-2.01,-4.23,264.25


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 443000
  custom_metrics: {}
  date: 2021-10-24_21-47-34
  done: false
  episode_len_mean: 264.77
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.6476999999999866
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 5
  episodes_total: 1613
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4530593359749449e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6645632197459539
          entropy_coeff: 0.009999999999999998
          kl: 0.004657012446931771
          policy_loss: -0.024641122296452524
          total_loss: -0.014616705228885014
          vf_explained_var: 0.22490687668323517
          vf_loss: 0.016670046953691377
    num_agent_steps_sampled: 443000
    num_agent_steps_trained: 443000
    num_steps_sampled: 443000
    num_steps_trained: 44

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,443,18999.4,443000,-2.6477,-2.01,-4.23,264.77


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 444000
  custom_metrics: {}
  date: 2021-10-24_21-48-18
  done: false
  episode_len_mean: 265.35
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.6534999999999878
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 3
  episodes_total: 1616
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.265296679874724e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6535734633604685
          entropy_coeff: 0.009999999999999998
          kl: 0.009054760142349651
          policy_loss: -0.09345796952644984
          total_loss: -0.08712703809142112
          vf_explained_var: 0.2265087068080902
          vf_loss: 0.012866665102127526
    num_agent_steps_sampled: 444000
    num_agent_steps_trained: 444000
    num_steps_sampled: 444000
    num_steps_trained: 444000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,444,19042.7,444000,-2.6535,-2.01,-4.23,265.35




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 445000
  custom_metrics: {}
  date: 2021-10-24_21-49-20
  done: false
  episode_len_mean: 265.79
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.6578999999999864
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 5
  episodes_total: 1621
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.265296679874724e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6142437696456909
          entropy_coeff: 0.009999999999999998
          kl: 0.007686265743019918
          policy_loss: -0.02205410831504398
          total_loss: -0.01234169602394104
          vf_explained_var: 0.20753860473632812
          vf_loss: 0.015854847017261718
    num_agent_steps_sampled: 445000
    num_agent_steps_trained: 445000
    num_steps_sampled: 445000
    num_steps_trained: 44500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,445,19105.3,445000,-2.6579,-2.01,-4.23,265.79


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 446000
  custom_metrics: {}
  date: 2021-10-24_21-50-05
  done: false
  episode_len_mean: 264.97
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.6496999999999873
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 4
  episodes_total: 1625
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.265296679874724e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6792079534795549
          entropy_coeff: 0.009999999999999998
          kl: 0.008924256619313712
          policy_loss: 0.001498047841919793
          total_loss: 0.007153644412755966
          vf_explained_var: 0.2183174341917038
          vf_loss: 0.01244767637302478
    num_agent_steps_sampled: 446000
    num_agent_steps_trained: 446000
    num_steps_sampled: 446000
    num_steps_trained: 446000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,446,19149.5,446000,-2.6497,-2.01,-4.23,264.97


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 447000
  custom_metrics: {}
  date: 2021-10-24_21-50-48
  done: false
  episode_len_mean: 265.63
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.6562999999999874
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 4
  episodes_total: 1629
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.265296679874724e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6398919416798485
          entropy_coeff: 0.009999999999999998
          kl: 0.005077688177270861
          policy_loss: -0.006731536653306749
          total_loss: -0.00037703952855534025
          vf_explained_var: 0.1552978754043579
          vf_loss: 0.012753416680627398
    num_agent_steps_sampled: 447000
    num_agent_steps_trained: 447000
    num_steps_sampled: 447000
    num_steps_trained: 44

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,447,19192.8,447000,-2.6563,-2.01,-4.23,265.63


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 448000
  custom_metrics: {}
  date: 2021-10-24_21-51-32
  done: false
  episode_len_mean: 266.47
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.6646999999999874
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 4
  episodes_total: 1633
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.265296679874724e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5970241652594672
          entropy_coeff: 0.009999999999999998
          kl: 0.0025728792475398954
          policy_loss: 0.008228727264536752
          total_loss: 0.015240480999151866
          vf_explained_var: 0.13895586133003235
          vf_loss: 0.012981991914825308
    num_agent_steps_sampled: 448000
    num_agent_steps_trained: 448000
    num_steps_sampled: 448000
    num_steps_trained: 4480

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,448,19237,448000,-2.6647,-2.01,-4.23,266.47


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 449000
  custom_metrics: {}
  date: 2021-10-24_21-52-17
  done: false
  episode_len_mean: 266.76
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.667599999999987
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 4
  episodes_total: 1637
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.632648339937362e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5371694372759925
          entropy_coeff: 0.009999999999999998
          kl: 0.007452436464501488
          policy_loss: 0.05005644957224528
          total_loss: 0.05514161719216241
          vf_explained_var: 0.11142770200967789
          vf_loss: 0.010456860929520594
    num_agent_steps_sampled: 449000
    num_agent_steps_trained: 449000
    num_steps_sampled: 449000
    num_steps_trained: 449000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,449,19282.3,449000,-2.6676,-2.01,-4.23,266.76


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 450000
  custom_metrics: {}
  date: 2021-10-24_21-53-00
  done: false
  episode_len_mean: 267.15
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.671499999999987
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 4
  episodes_total: 1641
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.632648339937362e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5459519876374139
          entropy_coeff: 0.009999999999999998
          kl: 0.003853147236354933
          policy_loss: 0.042462261352274155
          total_loss: 0.04897179264161322
          vf_explained_var: 0.05999291315674782
          vf_loss: 0.011969051075478394
    num_agent_steps_sampled: 450000
    num_agent_steps_trained: 450000
    num_steps_sampled: 450000
    num_steps_trained: 450000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,450,19325.2,450000,-2.6715,-2.01,-4.23,267.15


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 451000
  custom_metrics: {}
  date: 2021-10-24_21-53-47
  done: false
  episode_len_mean: 267.31
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.673099999999987
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 4
  episodes_total: 1645
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.816324169968681e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5088071246941884
          entropy_coeff: 0.009999999999999998
          kl: 0.006549354087542151
          policy_loss: 0.02288392384847005
          total_loss: 0.032321953194008934
          vf_explained_var: 0.059906311333179474
          vf_loss: 0.014526100001401372
    num_agent_steps_sampled: 451000
    num_agent_steps_trained: 451000
    num_steps_sampled: 451000
    num_steps_trained: 451000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,451,19371.8,451000,-2.6731,-2.01,-4.23,267.31


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 452000
  custom_metrics: {}
  date: 2021-10-24_21-54-33
  done: false
  episode_len_mean: 267.57
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.6756999999999858
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 4
  episodes_total: 1649
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.816324169968681e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5508968038691415
          entropy_coeff: 0.009999999999999998
          kl: 0.007924407845799585
          policy_loss: -0.013651649695303705
          total_loss: -0.0044478471494383285
          vf_explained_var: 0.08906987309455872
          vf_loss: 0.014712770086609655
    num_agent_steps_sampled: 452000
    num_agent_steps_trained: 452000
    num_steps_sampled: 452000
    num_steps_trained: 45

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,452,19417.4,452000,-2.6757,-2.01,-4.23,267.57




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 453000
  custom_metrics: {}
  date: 2021-10-24_21-55-38
  done: false
  episode_len_mean: 267.71
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.677099999999987
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 5
  episodes_total: 1654
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.816324169968681e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5674617323610518
          entropy_coeff: 0.009999999999999998
          kl: 0.01658038403014454
          policy_loss: -0.026100250913037193
          total_loss: -0.013595792485607995
          vf_explained_var: 0.16535411775112152
          vf_loss: 0.018179074902501372
    num_agent_steps_sampled: 453000
    num_agent_steps_trained: 453000
    num_steps_sampled: 453000
    num_steps_trained: 45300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,453,19483.1,453000,-2.6771,-2.01,-4.23,267.71


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 454000
  custom_metrics: {}
  date: 2021-10-24_21-56-17
  done: false
  episode_len_mean: 269.99
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.6998999999999858
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 3
  episodes_total: 1657
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.816324169968681e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4441025276978812
          entropy_coeff: 0.009999999999999998
          kl: 0.07019371210051703
          policy_loss: 0.09035294892059433
          total_loss: 0.08486724106801881
          vf_explained_var: 0.6146625280380249
          vf_loss: 0.008955317621843682
    num_agent_steps_sampled: 454000
    num_agent_steps_trained: 454000
    num_steps_sampled: 454000
    num_steps_trained: 454000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,454,19522,454000,-2.6999,-2.01,-4.23,269.99


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 455000
  custom_metrics: {}
  date: 2021-10-24_21-56-55
  done: false
  episode_len_mean: 272.06
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.7205999999999864
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 3
  episodes_total: 1660
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.724486254953023e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1402216712633768
          entropy_coeff: 0.009999999999999998
          kl: 0.014398397746926427
          policy_loss: -0.0055825793080859715
          total_loss: -0.009239067054457135
          vf_explained_var: 0.445390909910202
          vf_loss: 0.00774572804544328
    num_agent_steps_sampled: 455000
    num_agent_steps_trained: 455000
    num_steps_sampled: 455000
    num_steps_trained: 4550

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,455,19559.9,455000,-2.7206,-2.14,-4.23,272.06


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 456000
  custom_metrics: {}
  date: 2021-10-24_21-57-30
  done: false
  episode_len_mean: 274.05
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.740499999999985
  episode_reward_min: -4.299999999999953
  episodes_this_iter: 3
  episodes_total: 1663
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.724486254953023e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4976175414191353
          entropy_coeff: 0.009999999999999998
          kl: 0.04407569201454158
          policy_loss: -0.01950737999545203
          total_loss: -0.026254254082838695
          vf_explained_var: 0.5774088501930237
          vf_loss: 0.008229302204886659
    num_agent_steps_sampled: 456000
    num_agent_steps_trained: 456000
    num_steps_sampled: 456000
    num_steps_trained: 456000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,456,19594.7,456000,-2.7405,-2.14,-4.3,274.05


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 457000
  custom_metrics: {}
  date: 2021-10-24_21-58-00
  done: false
  episode_len_mean: 274.48
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.744799999999985
  episode_reward_min: -5.019999999999937
  episodes_this_iter: 2
  episodes_total: 1665
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.086729382429534e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.7224644541740417
          entropy_coeff: 0.009999999999999998
          kl: 0.022913428477221724
          policy_loss: -0.23629706021812227
          total_loss: -0.24900007132026886
          vf_explained_var: 0.08484872430562973
          vf_loss: 0.004521632572545463
    num_agent_steps_sampled: 457000
    num_agent_steps_trained: 457000
    num_steps_sampled: 457000
    num_steps_trained: 45700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,457,19625.1,457000,-2.7448,-2.14,-5.02,274.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 458000
  custom_metrics: {}
  date: 2021-10-24_21-58-34
  done: false
  episode_len_mean: 275.8
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.777799999999985
  episode_reward_min: -7.309999999999931
  episodes_this_iter: 3
  episodes_total: 1668
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.130094073644301e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.2976077530119154
          entropy_coeff: 0.009999999999999998
          kl: 0.030705256318515495
          policy_loss: -0.08855215857426325
          total_loss: -0.09251923031277126
          vf_explained_var: 0.43458735942840576
          vf_loss: 0.009009003901802417
    num_agent_steps_sampled: 458000
    num_agent_steps_trained: 458000
    num_steps_sampled: 458000
    num_steps_trained: 458000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,458,19658.2,458000,-2.7778,-2.14,-7.31,275.8


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 459000
  custom_metrics: {}
  date: 2021-10-24_21-59-14
  done: false
  episode_len_mean: 271.75
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.7372999999999865
  episode_reward_min: -7.309999999999931
  episodes_this_iter: 4
  episodes_total: 1672
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.19514111046645e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9802258869012197
          entropy_coeff: 0.009999999999999998
          kl: 0.011310974547061568
          policy_loss: -0.014661045289701885
          total_loss: -0.012968995297948519
          vf_explained_var: 0.4073006808757782
          vf_loss: 0.011494306516316203
    num_agent_steps_sampled: 459000
    num_agent_steps_trained: 459000
    num_steps_sampled: 459000
    num_steps_trained: 4590

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,459,19698.8,459000,-2.7373,-2.14,-7.31,271.75


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 460000
  custom_metrics: {}
  date: 2021-10-24_21-59-53
  done: false
  episode_len_mean: 271.52
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.734799999999986
  episode_reward_min: -7.309999999999931
  episodes_this_iter: 3
  episodes_total: 1675
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.19514111046645e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.2257935265700022
          entropy_coeff: 0.009999999999999998
          kl: 0.04343193743617413
          policy_loss: 0.01182120732135243
          total_loss: 0.013395576096243329
          vf_explained_var: 0.36112064123153687
          vf_loss: 0.01383230473018355
    num_agent_steps_sampled: 460000
    num_agent_steps_trained: 460000
    num_steps_sampled: 460000
    num_steps_trained: 460000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,460,19737.5,460000,-2.7348,-2.14,-7.31,271.52


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 461000
  custom_metrics: {}
  date: 2021-10-24_22-00-37
  done: false
  episode_len_mean: 269.63
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.715899999999986
  episode_reward_min: -7.309999999999931
  episodes_this_iter: 4
  episodes_total: 1679
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3792711665699678e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8651664343145159
          entropy_coeff: 0.009999999999999998
          kl: 0.05553586871196122
          policy_loss: 0.017495433365305266
          total_loss: 0.021015945987568962
          vf_explained_var: 0.25388485193252563
          vf_loss: 0.01217217008686728
    num_agent_steps_sampled: 461000
    num_agent_steps_trained: 461000
    num_steps_sampled: 461000
    num_steps_trained: 461000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,461,19782,461000,-2.7159,-2.14,-7.31,269.63




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 462000
  custom_metrics: {}
  date: 2021-10-24_22-01-37
  done: false
  episode_len_mean: 268.65
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.7060999999999864
  episode_reward_min: -7.309999999999931
  episodes_this_iter: 4
  episodes_total: 1683
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0689067498549518e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9444472054640453
          entropy_coeff: 0.009999999999999998
          kl: 0.024723692039880046
          policy_loss: 0.01050148242049747
          total_loss: 0.013986135439740286
          vf_explained_var: 0.17250299453735352
          vf_loss: 0.01292912181880739
    num_agent_steps_sampled: 462000
    num_agent_steps_trained: 462000
    num_steps_sampled: 462000
    num_steps_trained: 46200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,462,19841,462000,-2.7061,-2.14,-7.31,268.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 463000
  custom_metrics: {}
  date: 2021-10-24_22-02-18
  done: false
  episode_len_mean: 267.98
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.699399999999987
  episode_reward_min: -7.309999999999931
  episodes_this_iter: 3
  episodes_total: 1686
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1033601247824274e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9316961805025736
          entropy_coeff: 0.009999999999999998
          kl: 0.012633518648086314
          policy_loss: -0.09308130765954653
          total_loss: -0.08987720244460635
          vf_explained_var: 0.08578908443450928
          vf_loss: 0.012521065150698027
    num_agent_steps_sampled: 463000
    num_agent_steps_trained: 463000
    num_steps_sampled: 463000
    num_steps_trained: 4630

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,463,19882.4,463000,-2.6994,-2.14,-7.31,267.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 464000
  custom_metrics: {}
  date: 2021-10-24_22-02-59
  done: false
  episode_len_mean: 268.02
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.6997999999999873
  episode_reward_min: -7.309999999999931
  episodes_this_iter: 4
  episodes_total: 1690
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1033601247824274e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8512002713150448
          entropy_coeff: 0.009999999999999998
          kl: 0.008974883513115373
          policy_loss: 0.011611326038837433
          total_loss: 0.017033887737327152
          vf_explained_var: 0.0820602998137474
          vf_loss: 0.013934565635604992
    num_agent_steps_sampled: 464000
    num_agent_steps_trained: 464000
    num_steps_sampled: 464000
    num_steps_trained: 4640

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,464,19923.8,464000,-2.6998,-2.14,-7.31,268.02


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 465000
  custom_metrics: {}
  date: 2021-10-24_22-03-39
  done: false
  episode_len_mean: 267.91
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.6986999999999868
  episode_reward_min: -7.309999999999931
  episodes_this_iter: 4
  episodes_total: 1694
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1033601247824274e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8715651313463847
          entropy_coeff: 0.009999999999999998
          kl: 0.004836580936508028
          policy_loss: 0.016140964461697473
          total_loss: 0.021378772374656466
          vf_explained_var: 0.10603625327348709
          vf_loss: 0.013953462460388739
    num_agent_steps_sampled: 465000
    num_agent_steps_trained: 465000
    num_steps_sampled: 465000
    num_steps_trained: 465

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,465,19963.8,465000,-2.6987,-2.14,-7.31,267.91


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 466000
  custom_metrics: {}
  date: 2021-10-24_22-04-19
  done: false
  episode_len_mean: 268.71
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.7066999999999863
  episode_reward_min: -7.309999999999931
  episodes_this_iter: 3
  episodes_total: 1697
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5516800623912137e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8163502229584588
          entropy_coeff: 0.009999999999999998
          kl: 0.008115764094148088
          policy_loss: -0.08848534284366502
          total_loss: -0.08229384819666545
          vf_explained_var: 0.0836799219250679
          vf_loss: 0.014354996031357183
    num_agent_steps_sampled: 466000
    num_agent_steps_trained: 466000
    num_steps_sampled: 466000
    num_steps_trained: 4660

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,466,20003.3,466000,-2.7067,-2.14,-7.31,268.71


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 467000
  custom_metrics: {}
  date: 2021-10-24_22-04-58
  done: false
  episode_len_mean: 268.91
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.708699999999987
  episode_reward_min: -7.309999999999931
  episodes_this_iter: 4
  episodes_total: 1701
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5516800623912137e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8163993106948004
          entropy_coeff: 0.009999999999999998
          kl: 0.009262727216880606
          policy_loss: 0.013293175482087666
          total_loss: 0.019379979951514137
          vf_explained_var: 0.12273277342319489
          vf_loss: 0.014250797871500253
    num_agent_steps_sampled: 467000
    num_agent_steps_trained: 467000
    num_steps_sampled: 467000
    num_steps_trained: 4670

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,467,20042.5,467000,-2.7087,-2.14,-7.31,268.91


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 468000
  custom_metrics: {}
  date: 2021-10-24_22-05-40
  done: false
  episode_len_mean: 270.17
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.7596999999999863
  episode_reward_min: -7.309999999999931
  episodes_this_iter: 4
  episodes_total: 1705
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5516800623912137e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0570376919375526
          entropy_coeff: 0.009999999999999998
          kl: 0.019995864665314875
          policy_loss: -0.16891566353539625
          total_loss: -0.11394107855028576
          vf_explained_var: 0.2072104513645172
          vf_loss: 0.06554496019250816
    num_agent_steps_sampled: 468000
    num_agent_steps_trained: 468000
    num_steps_sampled: 468000
    num_steps_trained: 46800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,468,20084.4,468000,-2.7597,-2.14,-7.31,270.17


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 469000
  custom_metrics: {}
  date: 2021-10-24_22-06-22
  done: false
  episode_len_mean: 270.1
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.7589999999999857
  episode_reward_min: -7.309999999999931
  episodes_this_iter: 3
  episodes_total: 1708
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5516800623912137e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8508842349052429
          entropy_coeff: 0.009999999999999998
          kl: 0.005610009421608532
          policy_loss: -0.016397387906908988
          total_loss: -0.01389931005736192
          vf_explained_var: 0.2067776620388031
          vf_loss: 0.011006920189700193
    num_agent_steps_sampled: 469000
    num_agent_steps_trained: 469000
    num_steps_sampled: 469000
    num_steps_trained: 4690

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,469,20126.4,469000,-2.759,-2.14,-7.31,270.1




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 470000
  custom_metrics: {}
  date: 2021-10-24_22-07-21
  done: false
  episode_len_mean: 271.3
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.769899999999986
  episode_reward_min: -7.309999999999931
  episodes_this_iter: 4
  episodes_total: 1712
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5516800623912137e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.026571242014567
          entropy_coeff: 0.009999999999999998
          kl: 0.053961073725348564
          policy_loss: -0.032855927550958265
          total_loss: 0.03483320304916965
          vf_explained_var: 0.4200752377510071
          vf_loss: 0.07795483898371458
    num_agent_steps_sampled: 470000
    num_agent_steps_trained: 470000
    num_steps_sampled: 470000
    num_steps_trained: 470000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,470,20185.5,470000,-2.7699,-2.14,-7.31,271.3


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 471000
  custom_metrics: {}
  date: 2021-10-24_22-08-04
  done: false
  episode_len_mean: 271.53
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.7721999999999865
  episode_reward_min: -7.309999999999931
  episodes_this_iter: 4
  episodes_total: 1716
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3275200935868203e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.795015299320221
          entropy_coeff: 0.009999999999999998
          kl: 0.00882643009367838
          policy_loss: 0.017212127811378902
          total_loss: 0.023527064008845223
          vf_explained_var: 0.19416311383247375
          vf_loss: 0.014265090641048219
    num_agent_steps_sampled: 471000
    num_agent_steps_trained: 471000
    num_steps_sampled: 471000
    num_steps_trained: 47100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,471,20228.4,471000,-2.7722,-2.14,-7.31,271.53


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 472000
  custom_metrics: {}
  date: 2021-10-24_22-08-47
  done: false
  episode_len_mean: 272.42
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.7810999999999853
  episode_reward_min: -7.309999999999931
  episodes_this_iter: 4
  episodes_total: 1720
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3275200935868203e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8215708977646298
          entropy_coeff: 0.009999999999999998
          kl: 0.0047978740205711174
          policy_loss: -0.011101513769891528
          total_loss: -0.005387468884388606
          vf_explained_var: 0.20473746955394745
          vf_loss: 0.013929755406247244
    num_agent_steps_sampled: 472000
    num_agent_steps_trained: 472000
    num_steps_sampled: 472000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,472,20271.1,472000,-2.7811,-2.25,-7.31,272.42


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 473000
  custom_metrics: {}
  date: 2021-10-24_22-09-18
  done: false
  episode_len_mean: 274.86
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.864899999999985
  episode_reward_min: -10.609999999999914
  episodes_this_iter: 2
  episodes_total: 1722
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1637600467934101e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.3431184477276272
          entropy_coeff: 0.009999999999999998
          kl: 0.07031026008339764
          policy_loss: -0.04892526591817538
          total_loss: 0.07632790232698122
          vf_explained_var: 0.05991252139210701
          vf_loss: 0.13868434789280096
    num_agent_steps_sampled: 473000
    num_agent_steps_trained: 473000
    num_steps_sampled: 473000
    num_steps_trained: 473000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,473,20301.7,473000,-2.8649,-2.25,-10.61,274.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 474000
  custom_metrics: {}
  date: 2021-10-24_22-09-44
  done: false
  episode_len_mean: 280.78
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -2.903399999999984
  episode_reward_min: -10.609999999999914
  episodes_this_iter: 2
  episodes_total: 1724
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7456400701901155e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2866217447651758
          entropy_coeff: 0.009999999999999998
          kl: 0.04500996492281979
          policy_loss: -0.1138236110823022
          total_loss: -0.06169414478871557
          vf_explained_var: 0.36314094066619873
          vf_loss: 0.06499567763724674
    num_agent_steps_sampled: 474000
    num_agent_steps_trained: 474000
    num_steps_sampled: 474000
    num_steps_trained: 474000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,474,20328.6,474000,-2.9034,-1.98,-10.61,280.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 475000
  custom_metrics: {}
  date: 2021-10-24_22-10-18
  done: false
  episode_len_mean: 284.66
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.136799999999984
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 2
  episodes_total: 1726
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.618460105285173e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.388045985168881
          entropy_coeff: 0.009999999999999998
          kl: 0.05577829540639547
          policy_loss: -0.0461611861983935
          total_loss: 0.3382227309462097
          vf_explained_var: 0.3632153272628784
          vf_loss: 0.3982643726385302
    num_agent_steps_sampled: 475000
    num_agent_steps_trained: 475000
    num_steps_sampled: 475000
    num_steps_trained: 475000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,475,20361.6,475000,-3.1368,-1.98,-19.04,284.66


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 476000
  custom_metrics: {}
  date: 2021-10-24_22-10-44
  done: false
  episode_len_mean: 289.92
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.2773999999999823
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 2
  episodes_total: 1728
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.92769015792776e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.453204992082384
          entropy_coeff: 0.009999999999999998
          kl: 0.0583280051570065
          policy_loss: -0.1421572650472323
          total_loss: 0.030444543063640594
          vf_explained_var: 0.5190562605857849
          vf_loss: 0.18713383931252692
    num_agent_steps_sampled: 476000
    num_agent_steps_trained: 476000
    num_steps_sampled: 476000
    num_steps_trained: 476000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,476,20387.6,476000,-3.2774,-1.98,-19.04,289.92


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 477000
  custom_metrics: {}
  date: 2021-10-24_22-11-19
  done: false
  episode_len_mean: 292.52
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.351899999999981
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 3
  episodes_total: 1731
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.89153523689164e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0160344474845462
          entropy_coeff: 0.009999999999999998
          kl: 0.008636581418436625
          policy_loss: -0.2469162149561776
          total_loss: -0.2432514794998699
          vf_explained_var: 0.7709664106369019
          vf_loss: 0.01382507776013679
    num_agent_steps_sampled: 477000
    num_agent_steps_trained: 477000
    num_steps_sampled: 477000
    num_steps_trained: 477000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,477,20422.6,477000,-3.3519,-1.98,-19.04,292.52


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 478000
  custom_metrics: {}
  date: 2021-10-24_22-11-55
  done: false
  episode_len_mean: 295.59
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.4612999999999805
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 3
  episodes_total: 1734
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.89153523689164e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0788799279265933
          entropy_coeff: 0.009999999999999998
          kl: 0.021694322874809026
          policy_loss: -0.0035514263643158807
          total_loss: -0.001555185185538398
          vf_explained_var: 0.5577194690704346
          vf_loss: 0.012785033167650303
    num_agent_steps_sampled: 478000
    num_agent_steps_trained: 478000
    num_steps_sampled: 478000
    num_steps_trained: 47

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,478,20458.7,478000,-3.4613,-1.98,-19.04,295.59


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 479000
  custom_metrics: {}
  date: 2021-10-24_22-12-32
  done: false
  episode_len_mean: 298.99
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.513299999999981
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 3
  episodes_total: 1737
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.837302855337459e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.3775674925910102
          entropy_coeff: 0.009999999999999998
          kl: 0.07419697424703989
          policy_loss: -0.029550303394595783
          total_loss: 0.0026015135563082167
          vf_explained_var: 0.4392531216144562
          vf_loss: 0.045927429399711804
    num_agent_steps_sampled: 479000
    num_agent_steps_trained: 479000
    num_steps_sampled: 479000
    num_steps_trained: 4790

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,479,20495.7,479000,-3.5133,-1.98,-19.04,298.99




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 480000
  custom_metrics: {}
  date: 2021-10-24_22-13-23
  done: false
  episode_len_mean: 301.82
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.54159999999998
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 3
  episodes_total: 1740
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3255954283006186e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.6198633233706157
          entropy_coeff: 0.009999999999999998
          kl: 0.017345045282924017
          policy_loss: 0.005554439872503281
          total_loss: -0.0008919384744432237
          vf_explained_var: 0.23266424238681793
          vf_loss: 0.009752237972699933
    num_agent_steps_sampled: 480000
    num_agent_steps_trained: 480000
    num_steps_sampled: 480000
    num_steps_trained: 48

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,480,20547.2,480000,-3.5416,-1.98,-19.04,301.82


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 481000
  custom_metrics: {}
  date: 2021-10-24_22-13-58
  done: false
  episode_len_mean: 305.14
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.5747999999999798
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 3
  episodes_total: 1743
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3255954283006186e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.7259292390611436
          entropy_coeff: 0.009999999999999998
          kl: 0.016480838175023313
          policy_loss: 0.04175495124525494
          total_loss: 0.034614228539996676
          vf_explained_var: 0.06942929327487946
          vf_loss: 0.01011855574324727
    num_agent_steps_sampled: 481000
    num_agent_steps_trained: 481000
    num_steps_sampled: 481000
    num_steps_trained: 4810

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,481,20582.4,481000,-3.5748,-1.98,-19.04,305.14


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 482000
  custom_metrics: {}
  date: 2021-10-24_22-14-34
  done: false
  episode_len_mean: 307.7
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.6003999999999787
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 3
  episodes_total: 1746
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3255954283006186e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.5414589126904805
          entropy_coeff: 0.009999999999999998
          kl: 0.010304317245995875
          policy_loss: 0.041388546923796336
          total_loss: 0.03536535311076376
          vf_explained_var: -0.03880840912461281
          vf_loss: 0.00939138814703458
    num_agent_steps_sampled: 482000
    num_agent_steps_trained: 482000
    num_steps_sampled: 482000
    num_steps_trained: 4820

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,482,20617.9,482000,-3.6004,-1.98,-19.04,307.7


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 483000
  custom_metrics: {}
  date: 2021-10-24_22-15-09
  done: false
  episode_len_mean: 310.04
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.623799999999978
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 3
  episodes_total: 1749
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3255954283006186e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.4809041937192282
          entropy_coeff: 0.009999999999999998
          kl: 0.012548772102460583
          policy_loss: 0.018233603404627905
          total_loss: 0.013628196881877051
          vf_explained_var: -0.26441827416419983
          vf_loss: 0.010203622868801984
    num_agent_steps_sampled: 483000
    num_agent_steps_trained: 483000
    num_steps_sampled: 483000
    num_steps_trained: 48

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,483,20653.1,483000,-3.6238,-1.98,-19.04,310.04


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 484000
  custom_metrics: {}
  date: 2021-10-24_22-15-41
  done: false
  episode_len_mean: 313.14
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.6547999999999767
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 3
  episodes_total: 1752
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3255954283006186e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.4972513914108276
          entropy_coeff: 0.009999999999999998
          kl: 0.013724454904261702
          policy_loss: 0.047179084022839865
          total_loss: 0.04271013148956829
          vf_explained_var: -0.24364593625068665
          vf_loss: 0.010503543507204288
    num_agent_steps_sampled: 484000
    num_agent_steps_trained: 484000
    num_steps_sampled: 484000
    num_steps_trained: 48

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,484,20685.3,484000,-3.6548,-1.98,-19.04,313.14


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 485000
  custom_metrics: {}
  date: 2021-10-24_22-16-18
  done: false
  episode_len_mean: 314.85
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.671899999999976
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 3
  episodes_total: 1755
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3255954283006186e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2111250400543212
          entropy_coeff: 0.009999999999999998
          kl: 0.011198719448870006
          policy_loss: -0.11167337248722713
          total_loss: -0.10921411994430753
          vf_explained_var: 0.12662643194198608
          vf_loss: 0.014570489805191756
    num_agent_steps_sampled: 485000
    num_agent_steps_trained: 485000
    num_steps_sampled: 485000
    num_steps_trained: 485

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,485,20721.9,485000,-3.6719,-1.98,-19.04,314.85


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 486000
  custom_metrics: {}
  date: 2021-10-24_22-16-54
  done: false
  episode_len_mean: 313.58
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.6591999999999767
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 4
  episodes_total: 1759
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3255954283006186e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1279939399825203
          entropy_coeff: 0.009999999999999998
          kl: 0.00817805058477461
          policy_loss: 0.006644838551680247
          total_loss: 0.009794468101527955
          vf_explained_var: 0.1475098580121994
          vf_loss: 0.01442956469125218
    num_agent_steps_sampled: 486000
    num_agent_steps_trained: 486000
    num_steps_sampled: 486000
    num_steps_trained: 48600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,486,20758.1,486000,-3.6592,-1.98,-19.04,313.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 487000
  custom_metrics: {}
  date: 2021-10-24_22-17-31
  done: false
  episode_len_mean: 313.41
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.657499999999977
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 3
  episodes_total: 1762
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3255954283006186e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1270224180486468
          entropy_coeff: 0.009999999999999998
          kl: 0.009007318534085032
          policy_loss: 0.045045334266291724
          total_loss: 0.04415837973356247
          vf_explained_var: 0.04842039570212364
          vf_loss: 0.010383261976918827
    num_agent_steps_sampled: 487000
    num_agent_steps_trained: 487000
    num_steps_sampled: 487000
    num_steps_trained: 4870

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,487,20794.5,487000,-3.6575,-1.98,-19.04,313.41


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 488000
  custom_metrics: {}
  date: 2021-10-24_22-18-09
  done: false
  episode_len_mean: 311.67
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.6400999999999777
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 3
  episodes_total: 1765
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3255954283006186e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9065833833482531
          entropy_coeff: 0.009999999999999998
          kl: 0.05047741519009329
          policy_loss: -0.0762007776233885
          total_loss: -0.07143290920390023
          vf_explained_var: 0.051188454031944275
          vf_loss: 0.013833641178078122
    num_agent_steps_sampled: 488000
    num_agent_steps_trained: 488000
    num_steps_sampled: 488000
    num_steps_trained: 488

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,488,20833,488000,-3.6401,-1.98,-19.04,311.67


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 489000
  custom_metrics: {}
  date: 2021-10-24_22-18-54
  done: false
  episode_len_mean: 305.77
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.561299999999979
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 4
  episodes_total: 1769
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9883931424509284e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6638202958636814
          entropy_coeff: 0.009999999999999998
          kl: 0.005477940069995747
          policy_loss: -0.02276334058907297
          total_loss: -0.01438017338514328
          vf_explained_var: 0.10341661423444748
          vf_loss: 0.015021364349457953
    num_agent_steps_sampled: 489000
    num_agent_steps_trained: 489000
    num_steps_sampled: 489000
    num_steps_trained: 489

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,489,20878.1,489000,-3.5613,-1.98,-19.04,305.77




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 490000
  custom_metrics: {}
  date: 2021-10-24_22-19-58
  done: false
  episode_len_mean: 305.95
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.5630999999999786
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 4
  episodes_total: 1773
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9883931424509284e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7557759775055779
          entropy_coeff: 0.009999999999999998
          kl: 0.015932256965117887
          policy_loss: -0.043378541701369816
          total_loss: -0.03696927693155077
          vf_explained_var: 0.2741236388683319
          vf_loss: 0.013966997112664912
    num_agent_steps_sampled: 490000
    num_agent_steps_trained: 490000
    num_steps_sampled: 490000
    num_steps_trained: 49

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,490,20941.4,490000,-3.5631,-1.98,-19.04,305.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 491000
  custom_metrics: {}
  date: 2021-10-24_22-20-44
  done: false
  episode_len_mean: 303.44
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.5381999999999794
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 5
  episodes_total: 1778
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9883931424509284e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5073280877537197
          entropy_coeff: 0.009999999999999998
          kl: 0.002663283528246533
          policy_loss: -0.028855134546756745
          total_loss: -0.018658719294601018
          vf_explained_var: 0.2814665734767914
          vf_loss: 0.015269693245904313
    num_agent_steps_sampled: 491000
    num_agent_steps_trained: 491000
    num_steps_sampled: 491000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,491,20987.5,491000,-3.5382,-1.98,-19.04,303.44


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 492000
  custom_metrics: {}
  date: 2021-10-24_22-21-30
  done: false
  episode_len_mean: 302.76
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.53139999999998
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 4
  episodes_total: 1782
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.941965712254642e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6856998148891661
          entropy_coeff: 0.009999999999999998
          kl: 0.07595716860430568
          policy_loss: 0.02659859632452329
          total_loss: 0.0315040303601159
          vf_explained_var: 0.18574848771095276
          vf_loss: 0.011762357668744193
    num_agent_steps_sampled: 492000
    num_agent_steps_trained: 492000
    num_steps_sampled: 492000
    num_steps_trained: 492000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,492,21033.7,492000,-3.5314,-1.98,-19.04,302.76


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 493000
  custom_metrics: {}
  date: 2021-10-24_22-22-10
  done: false
  episode_len_mean: 302.23
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.52609999999998
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 4
  episodes_total: 1786
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4912948568381965e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8510125014517043
          entropy_coeff: 0.009999999999999998
          kl: 0.007740841033731271
          policy_loss: -0.00387741368677881
          total_loss: -3.674642907248603e-05
          vf_explained_var: 0.16810472309589386
          vf_loss: 0.012350785546004772
    num_agent_steps_sampled: 493000
    num_agent_steps_trained: 493000
    num_steps_sampled: 493000
    num_steps_trained: 49

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,493,21073.4,493000,-3.5261,-1.98,-19.04,302.23


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 494000
  custom_metrics: {}
  date: 2021-10-24_22-22-51
  done: false
  episode_len_mean: 302.13
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.52509999999998
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 3
  episodes_total: 1789
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4912948568381965e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7710653007030487
          entropy_coeff: 0.009999999999999998
          kl: 0.006388052402367745
          policy_loss: -0.0060033247702651555
          total_loss: -0.0042580878155099025
          vf_explained_var: 0.12447759509086609
          vf_loss: 0.009455882819990317
    num_agent_steps_sampled: 494000
    num_agent_steps_trained: 494000
    num_steps_sampled: 494000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,494,21114.5,494000,-3.5251,-1.98,-19.04,302.13


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 495000
  custom_metrics: {}
  date: 2021-10-24_22-23-32
  done: false
  episode_len_mean: 301.85
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.5222999999999796
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 4
  episodes_total: 1793
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4912948568381965e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8217190583546956
          entropy_coeff: 0.009999999999999998
          kl: 0.004804765741757559
          policy_loss: 0.008471972743670146
          total_loss: 0.014058028161525727
          vf_explained_var: 0.09584088623523712
          vf_loss: 0.013803239942838748
    num_agent_steps_sampled: 495000
    num_agent_steps_trained: 495000
    num_steps_sampled: 495000
    num_steps_trained: 49

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,495,21155.5,495000,-3.5223,-1.98,-19.04,301.85


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 496000
  custom_metrics: {}
  date: 2021-10-24_22-24-13
  done: false
  episode_len_mean: 301.3
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.5167999999999804
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 4
  episodes_total: 1797
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.456474284190982e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.852907829814487
          entropy_coeff: 0.009999999999999998
          kl: 0.009675528778892644
          policy_loss: 0.025497056543827057
          total_loss: 0.03094711394773589
          vf_explained_var: 0.08535438776016235
          vf_loss: 0.013979133694536156
    num_agent_steps_sampled: 496000
    num_agent_steps_trained: 496000
    num_steps_sampled: 496000
    num_steps_trained: 496000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,496,21196.7,496000,-3.5168,-1.98,-19.04,301.3




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 497000
  custom_metrics: {}
  date: 2021-10-24_22-25-11
  done: false
  episode_len_mean: 301.26
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.51639999999998
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 4
  episodes_total: 1801
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.456474284190982e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8402812315358056
          entropy_coeff: 0.009999999999999998
          kl: 0.007162475499477397
          policy_loss: 0.006058215515481101
          total_loss: 0.011439385016759236
          vf_explained_var: 0.118921659886837
          vf_loss: 0.013783982013248736
    num_agent_steps_sampled: 497000
    num_agent_steps_trained: 497000
    num_steps_sampled: 497000
    num_steps_trained: 497000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,497,21254.5,497000,-3.5164,-1.98,-19.04,301.26


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 498000
  custom_metrics: {}
  date: 2021-10-24_22-25-54
  done: false
  episode_len_mean: 301.13
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.515099999999981
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 3
  episodes_total: 1804
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.456474284190982e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7498800138632457
          entropy_coeff: 0.009999999999999998
          kl: 0.006435230681072489
          policy_loss: -0.05555462150110139
          total_loss: -0.05136024844315317
          vf_explained_var: 0.0989142656326294
          vf_loss: 0.011693172503469719
    num_agent_steps_sampled: 498000
    num_agent_steps_trained: 498000
    num_steps_sampled: 498000
    num_steps_trained: 49800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,498,21297.2,498000,-3.5151,-1.98,-19.04,301.13


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 499000
  custom_metrics: {}
  date: 2021-10-24_22-26-36
  done: false
  episode_len_mean: 300.2
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.46739999999998
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 4
  episodes_total: 1808
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.456474284190982e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7783413158522712
          entropy_coeff: 0.009999999999999998
          kl: 0.0030417939080150643
          policy_loss: -0.007199768225351969
          total_loss: -0.001768623623583052
          vf_explained_var: 0.11077703535556793
          vf_loss: 0.013214553706347942
    num_agent_steps_sampled: 499000
    num_agent_steps_trained: 499000
    num_steps_sampled: 499000
    num_steps_trained: 499

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,499,21339.7,499000,-3.4674,-1.98,-19.04,300.2


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 500000
  custom_metrics: {}
  date: 2021-10-24_22-27-18
  done: false
  episode_len_mean: 299.79
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.46439999999998
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 4
  episodes_total: 1812
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.728237142095491e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8369126710626814
          entropy_coeff: 0.009999999999999998
          kl: 0.005290618736035273
          policy_loss: 0.014019611850380897
          total_loss: 0.019084131634897656
          vf_explained_var: 0.10334355384111404
          vf_loss: 0.013433644537710482
    num_agent_steps_sampled: 500000
    num_agent_steps_trained: 500000
    num_steps_sampled: 500000
    num_steps_trained: 50000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,500,21381.5,500000,-3.4644,-1.98,-19.04,299.79


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 501000
  custom_metrics: {}
  date: 2021-10-24_22-27-56
  done: false
  episode_len_mean: 300.29
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.4693999999999803
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 4
  episodes_total: 1816
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.728237142095491e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8188163724210528
          entropy_coeff: 0.009999999999999998
          kl: 0.004091271220957695
          policy_loss: 0.041225253376695845
          total_loss: 0.0443535259200467
          vf_explained_var: 0.09206699579954147
          vf_loss: 0.011316435763405429
    num_agent_steps_sampled: 501000
    num_agent_steps_trained: 501000
    num_steps_sampled: 501000
    num_steps_trained: 50100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,501,21419.6,501000,-3.4694,-1.98,-19.04,300.29


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 502000
  custom_metrics: {}
  date: 2021-10-24_22-28-39
  done: false
  episode_len_mean: 300.5
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.47149999999998
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 3
  episodes_total: 1819
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8641185710477456e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7231184204419454
          entropy_coeff: 0.009999999999999998
          kl: 0.006183808884275087
          policy_loss: -0.08475004318687651
          total_loss: -0.07822509830196699
          vf_explained_var: 0.06611127406358719
          vf_loss: 0.01375612947675917
    num_agent_steps_sampled: 502000
    num_agent_steps_trained: 502000
    num_steps_sampled: 502000
    num_steps_trained: 502000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,502,21462,502000,-3.4715,-1.98,-19.04,300.5


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 503000
  custom_metrics: {}
  date: 2021-10-24_22-29-19
  done: false
  episode_len_mean: 295.07
  episode_media: {}
  episode_reward_max: -1.9799999999999967
  episode_reward_mean: -3.347899999999982
  episode_reward_min: -19.040000000000017
  episodes_this_iter: 4
  episodes_total: 1823
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8641185710477456e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7645485070016649
          entropy_coeff: 0.009999999999999998
          kl: 0.0024625839244688703
          policy_loss: -0.07553926921553082
          total_loss: -0.0679621802435981
          vf_explained_var: 0.06809604167938232
          vf_loss: 0.015222576622747713
    num_agent_steps_sampled: 503000
    num_agent_steps_trained: 503000
    num_steps_sampled: 503000
    num_steps_trained: 503

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,503,21502.2,503000,-3.3479,-1.98,-19.04,295.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 504000
  custom_metrics: {}
  date: 2021-10-24_22-30-01
  done: false
  episode_len_mean: 286.64
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -3.050099999999982
  episode_reward_min: -13.049999999999912
  episodes_this_iter: 4
  episodes_total: 1827
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.320592855238728e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7552439285649194
          entropy_coeff: 0.009999999999999998
          kl: 0.004835566087004552
          policy_loss: 0.011276848117510478
          total_loss: 0.01855530163480176
          vf_explained_var: 0.07567595690488815
          vf_loss: 0.014830894437101153
    num_agent_steps_sampled: 504000
    num_agent_steps_trained: 504000
    num_steps_sampled: 504000
    num_steps_trained: 50400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,504,21543.9,504000,-3.0501,-2.18,-13.05,286.64




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 505000
  custom_metrics: {}
  date: 2021-10-24_22-31-01
  done: false
  episode_len_mean: 281.63
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.912999999999984
  episode_reward_min: -13.049999999999912
  episodes_this_iter: 4
  episodes_total: 1831
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.660296427619364e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.773468996418847
          entropy_coeff: 0.009999999999999998
          kl: 0.01006880015590321
          policy_loss: 0.028040350808037653
          total_loss: 0.03383935367067655
          vf_explained_var: 0.14871767163276672
          vf_loss: 0.013533693510625097
    num_agent_steps_sampled: 505000
    num_agent_steps_trained: 505000
    num_steps_sampled: 505000
    num_steps_trained: 505000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,505,21603.8,505000,-2.913,-2.18,-13.05,281.63


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 506000
  custom_metrics: {}
  date: 2021-10-24_22-31-44
  done: false
  episode_len_mean: 276.58
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.765799999999985
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 1835
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.660296427619364e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7839114162656996
          entropy_coeff: 0.009999999999999998
          kl: 0.003992236194040307
          policy_loss: 0.023018074946271047
          total_loss: 0.03007386914557881
          vf_explained_var: 0.1269446462392807
          vf_loss: 0.014894909515149064
    num_agent_steps_sampled: 506000
    num_agent_steps_trained: 506000
    num_steps_sampled: 506000
    num_steps_trained: 506000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,506,21647.5,506000,-2.7658,-2.18,-3.88,276.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 507000
  custom_metrics: {}
  date: 2021-10-24_22-32-27
  done: false
  episode_len_mean: 274.2
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.741999999999985
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 1839
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.330148213809682e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6866269184483422
          entropy_coeff: 0.009999999999999998
          kl: 0.005192330376621928
          policy_loss: 0.017629328038957386
          total_loss: 0.025616498788197835
          vf_explained_var: 0.12151864171028137
          vf_loss: 0.014853435030414
    num_agent_steps_sampled: 507000
    num_agent_steps_trained: 507000
    num_steps_sampled: 507000
    num_steps_trained: 507000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,507,21690.6,507000,-2.742,-2.18,-3.85,274.2


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 508000
  custom_metrics: {}
  date: 2021-10-24_22-33-10
  done: false
  episode_len_mean: 270.89
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.708899999999986
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 1843
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.330148213809682e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8146846267912123
          entropy_coeff: 0.009999999999999998
          kl: 0.009789319587683407
          policy_loss: 0.008626732726891835
          total_loss: 0.014859476188818613
          vf_explained_var: 0.15890280902385712
          vf_loss: 0.01437958889744348
    num_agent_steps_sampled: 508000
    num_agent_steps_trained: 508000
    num_steps_sampled: 508000
    num_steps_trained: 508000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,508,21733.1,508000,-2.7089,-2.18,-3.85,270.89


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 509000
  custom_metrics: {}
  date: 2021-10-24_22-33-53
  done: false
  episode_len_mean: 268.81
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.688099999999986
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 3
  episodes_total: 1846
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.330148213809682e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7477116829819149
          entropy_coeff: 0.009999999999999998
          kl: 0.00947515085949596
          policy_loss: -0.10604382670587964
          total_loss: -0.09935425685511695
          vf_explained_var: 0.20121954381465912
          vf_loss: 0.014166687128858433
    num_agent_steps_sampled: 509000
    num_agent_steps_trained: 509000
    num_steps_sampled: 509000
    num_steps_trained: 509000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,509,21776,509000,-2.6881,-2.18,-3.85,268.81


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 510000
  custom_metrics: {}
  date: 2021-10-24_22-34-37
  done: false
  episode_len_mean: 266.5
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.6649999999999863
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 1850
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.330148213809682e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7549640754858653
          entropy_coeff: 0.009999999999999998
          kl: 0.014016119178542239
          policy_loss: -0.0825893383887079
          total_loss: -0.07698423572712475
          vf_explained_var: 0.252398818731308
          vf_loss: 0.013154743999863664
    num_agent_steps_sampled: 510000
    num_agent_steps_trained: 510000
    num_steps_sampled: 510000
    num_steps_trained: 510000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,510,21820,510000,-2.665,-2.18,-3.85,266.5


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 511000
  custom_metrics: {}
  date: 2021-10-24_22-35-22
  done: false
  episode_len_mean: 262.6
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.625999999999988
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 5
  episodes_total: 1855
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.330148213809682e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6112177285883161
          entropy_coeff: 0.009999999999999998
          kl: 0.005403607976137595
          policy_loss: -0.030146465947230656
          total_loss: -0.018902812980943255
          vf_explained_var: 0.23299695551395416
          vf_loss: 0.017355833254340623
    num_agent_steps_sampled: 511000
    num_agent_steps_trained: 511000
    num_steps_sampled: 511000
    num_steps_trained: 5110

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,511,21864.8,511000,-2.626,-2.18,-3.52,262.6


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 512000
  custom_metrics: {}
  date: 2021-10-24_22-36-05
  done: false
  episode_len_mean: 261.44
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.6143999999999887
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 3
  episodes_total: 1858
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.330148213809682e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6999735587173038
          entropy_coeff: 0.009999999999999998
          kl: 0.007459618033705103
          policy_loss: -0.03724504419498974
          total_loss: -0.034442436860667336
          vf_explained_var: 0.3723863363265991
          vf_loss: 0.009802344017144707
    num_agent_steps_sampled: 512000
    num_agent_steps_trained: 512000
    num_steps_sampled: 512000
    num_steps_trained: 5120

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,512,21908.4,512000,-2.6144,-2.18,-3.52,261.44




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 513000
  custom_metrics: {}
  date: 2021-10-24_22-37-10
  done: false
  episode_len_mean: 257.46
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.574599999999989
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 5
  episodes_total: 1863
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.330148213809682e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.53492318491141
          entropy_coeff: 0.009999999999999998
          kl: 0.0029961901786792725
          policy_loss: -0.0023445853756533727
          total_loss: 0.008024887823396259
          vf_explained_var: 0.269225150346756
          vf_loss: 0.015718706686877543
    num_agent_steps_sampled: 513000
    num_agent_steps_trained: 513000
    num_steps_sampled: 513000
    num_steps_trained: 51300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,513,21972.6,513000,-2.5746,-2.08,-3.13,257.46


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 514000
  custom_metrics: {}
  date: 2021-10-24_22-37-57
  done: false
  episode_len_mean: 255.64
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.5563999999999893
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1867
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.165074106904841e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.4589493191904492
          entropy_coeff: 0.009999999999999998
          kl: 0.016826092988923038
          policy_loss: 0.04856819899545776
          total_loss: 0.05563576345642408
          vf_explained_var: 0.1504991054534912
          vf_loss: 0.011657055881288316
    num_agent_steps_sampled: 514000
    num_agent_steps_trained: 514000
    num_steps_sampled: 514000
    num_steps_trained: 514000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,514,22020.1,514000,-2.5564,-2.08,-3.13,255.64


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 515000
  custom_metrics: {}
  date: 2021-10-24_22-38-44
  done: false
  episode_len_mean: 255.85
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.558499999999989
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1871
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.165074106904841e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.46944863862461517
          entropy_coeff: 0.009999999999999998
          kl: 0.02017748351435892
          policy_loss: -0.0008434106906255087
          total_loss: 0.006880637009938558
          vf_explained_var: 0.13976243138313293
          vf_loss: 0.012418532495697339
    num_agent_steps_sampled: 515000
    num_agent_steps_trained: 515000
    num_steps_sampled: 515000
    num_steps_trained: 51

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,515,22067,515000,-2.5585,-2.08,-3.13,255.85


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 516000
  custom_metrics: {}
  date: 2021-10-24_22-39-27
  done: false
  episode_len_mean: 255.33
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.5532999999999895
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1875
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.747611160357262e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.4462183074818717
          entropy_coeff: 0.009999999999999998
          kl: 0.0077186575324825904
          policy_loss: -0.02243059327205022
          total_loss: -0.014036920832263098
          vf_explained_var: 0.11031836271286011
          vf_loss: 0.012855856410331197
    num_agent_steps_sampled: 516000
    num_agent_steps_trained: 516000
    num_steps_sampled: 516000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,516,22109.6,516000,-2.5533,-2.08,-3.13,255.33


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 517000
  custom_metrics: {}
  date: 2021-10-24_22-40-13
  done: false
  episode_len_mean: 255.5
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.5549999999999895
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1879
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.747611160357262e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.43497025701734754
          entropy_coeff: 0.009999999999999998
          kl: 0.006136189623769421
          policy_loss: -0.11694876733753416
          total_loss: -0.10536910651458634
          vf_explained_var: 0.14375700056552887
          vf_loss: 0.015929363864577478
    num_agent_steps_sampled: 517000
    num_agent_steps_trained: 517000
    num_steps_sampled: 517000
    num_steps_trained: 517

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,517,22155.6,517000,-2.555,-2.08,-3.13,255.5


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 518000
  custom_metrics: {}
  date: 2021-10-24_22-40-58
  done: false
  episode_len_mean: 255.07
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.5506999999999893
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 5
  episodes_total: 1884
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.747611160357262e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5648176630338033
          entropy_coeff: 0.009999999999999998
          kl: 0.0330382052154587
          policy_loss: -0.005561109715037876
          total_loss: 0.0029334656894207
          vf_explained_var: 0.1377914696931839
          vf_loss: 0.014142755853633086
    num_agent_steps_sampled: 518000
    num_agent_steps_trained: 518000
    num_steps_sampled: 518000
    num_steps_trained: 518000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,518,22200.8,518000,-2.5507,-2.08,-3.13,255.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 519000
  custom_metrics: {}
  date: 2021-10-24_22-41-45
  done: false
  episode_len_mean: 253.58
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.5357999999999894
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1888
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.621416740535892e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.27136245038774276
          entropy_coeff: 0.009999999999999998
          kl: 0.002250844689766988
          policy_loss: 0.03249654629164272
          total_loss: 0.0420052712990178
          vf_explained_var: 0.10150045901536942
          vf_loss: 0.01222234812254707
    num_agent_steps_sampled: 519000
    num_agent_steps_trained: 519000
    num_steps_sampled: 519000
    num_steps_trained: 519000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,519,22247.6,519000,-2.5358,-2.08,-3.13,253.58




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 520000
  custom_metrics: {}
  date: 2021-10-24_22-42-51
  done: false
  episode_len_mean: 252.13
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.52129999999999
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1892
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.310708370267946e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.341243150167995
          entropy_coeff: 0.009999999999999998
          kl: 0.004709019022689789
          policy_loss: -0.026707534078094695
          total_loss: -0.0172129443122281
          vf_explained_var: 0.1658085435628891
          vf_loss: 0.012907021761768394
    num_agent_steps_sampled: 520000
    num_agent_steps_trained: 520000
    num_steps_sampled: 520000
    num_steps_trained: 520000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,520,22314.1,520000,-2.5213,-2.06,-3.13,252.13


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 521000
  custom_metrics: {}
  date: 2021-10-24_22-43-36
  done: false
  episode_len_mean: 251.47
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.51469999999999
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1896
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.55354185133973e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.46550685266653696
          entropy_coeff: 0.009999999999999998
          kl: 0.010258215619682669
          policy_loss: -0.09139904512299432
          total_loss: -0.08132741492655542
          vf_explained_var: 0.1686014086008072
          vf_loss: 0.014726700261235237
    num_agent_steps_sampled: 521000
    num_agent_steps_trained: 521000
    num_steps_sampled: 521000
    num_steps_trained: 521000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,521,22358.9,521000,-2.5147,-2.06,-3.13,251.47


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 522000
  custom_metrics: {}
  date: 2021-10-24_22-44-23
  done: false
  episode_len_mean: 249.65
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.49649999999999
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 5
  episodes_total: 1901
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.55354185133973e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.3011270029677285
          entropy_coeff: 0.009999999999999998
          kl: 0.002851738638475333
          policy_loss: -0.0004210866987705231
          total_loss: 0.012297589745786455
          vf_explained_var: 0.15957927703857422
          vf_loss: 0.015729945432394744
    num_agent_steps_sampled: 522000
    num_agent_steps_trained: 522000
    num_steps_sampled: 522000
    num_steps_trained: 522000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,522,22405.4,522000,-2.4965,-2.06,-3.13,249.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 523000
  custom_metrics: {}
  date: 2021-10-24_22-45-09
  done: false
  episode_len_mean: 248.52
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.485199999999991
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1905
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.276770925669865e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.3432139472828971
          entropy_coeff: 0.009999999999999998
          kl: 0.007365104202768554
          policy_loss: 0.028869554069307114
          total_loss: 0.03867455940279696
          vf_explained_var: 0.1425526887178421
          vf_loss: 0.013237144011590216
    num_agent_steps_sampled: 523000
    num_agent_steps_trained: 523000
    num_steps_sampled: 523000
    num_steps_trained: 523000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,523,22452.3,523000,-2.4852,-2.06,-3.13,248.52


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 524000
  custom_metrics: {}
  date: 2021-10-24_22-45-56
  done: false
  episode_len_mean: 247.49
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.474899999999991
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1909
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.276770925669865e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.24367382046249178
          entropy_coeff: 0.009999999999999998
          kl: 0.002465896369352648
          policy_loss: -0.005681008679999246
          total_loss: 0.005474317073822022
          vf_explained_var: 0.10860683768987656
          vf_loss: 0.013592066677908103
    num_agent_steps_sampled: 524000
    num_agent_steps_trained: 524000
    num_steps_sampled: 524000
    num_steps_trained: 524000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,524,22498.9,524000,-2.4749,-2.06,-3.13,247.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 525000
  custom_metrics: {}
  date: 2021-10-24_22-46-43
  done: false
  episode_len_mean: 245.58
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4557999999999907
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 5
  episodes_total: 1914
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6383854628349324e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.2446010536617703
          entropy_coeff: 0.009999999999999998
          kl: 0.0031308180706051962
          policy_loss: -0.01893089728222953
          total_loss: -0.0041314836177561015
          vf_explained_var: 0.11925739794969559
          vf_loss: 0.017245426442888047
    num_agent_steps_sampled: 525000
    num_agent_steps_trained: 525000
    num_steps_sampled: 525000
    num_steps_trained: 525000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,525,22546,525000,-2.4558,-2.06,-3.13,245.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 526000
  custom_metrics: {}
  date: 2021-10-24_22-47-29
  done: false
  episode_len_mean: 244.01
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4400999999999917
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1918
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.191927314174662e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.29791978382401996
          entropy_coeff: 0.009999999999999998
          kl: 0.004385447039499013
          policy_loss: 0.02808730767832862
          total_loss: 0.039155669179227616
          vf_explained_var: 0.10291340947151184
          vf_loss: 0.01404755904028813
    num_agent_steps_sampled: 526000
    num_agent_steps_trained: 526000
    num_steps_sampled: 526000
    num_steps_trained: 526000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,526,22591.4,526000,-2.4401,-2.06,-3.13,244.01




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 527000
  custom_metrics: {}
  date: 2021-10-24_22-48-35
  done: false
  episode_len_mean: 242.67
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.426699999999992
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1922
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.095963657087331e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.3497755855321884
          entropy_coeff: 0.009999999999999998
          kl: 0.008654411867812408
          policy_loss: -0.02921330796347724
          total_loss: -0.018692123310433493
          vf_explained_var: 0.14859552681446075
          vf_loss: 0.014018938079890278
    num_agent_steps_sampled: 527000
    num_agent_steps_trained: 527000
    num_steps_sampled: 527000
    num_steps_trained: 527000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,527,22657.6,527000,-2.4267,-2.06,-3.13,242.67


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 528000
  custom_metrics: {}
  date: 2021-10-24_22-49-21
  done: false
  episode_len_mean: 241.82
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.418199999999992
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1926
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.095963657087331e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.37158913281228806
          entropy_coeff: 0.009999999999999998
          kl: 0.006204994611642078
          policy_loss: -0.09566010741723908
          total_loss: -0.08324980330136088
          vf_explained_var: 0.16454213857650757
          vf_loss: 0.016126196013970508
    num_agent_steps_sampled: 528000
    num_agent_steps_trained: 528000
    num_steps_sampled: 528000
    num_steps_trained: 528000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,528,22703.4,528000,-2.4182,-2.06,-3.13,241.82


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 529000
  custom_metrics: {}
  date: 2021-10-24_22-50-05
  done: false
  episode_len_mean: 241.23
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.412299999999992
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1930
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.095963657087331e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.48416972822613186
          entropy_coeff: 0.009999999999999998
          kl: 0.007322707370054138
          policy_loss: -0.10759861452711952
          total_loss: -0.0970802911453777
          vf_explained_var: 0.28513526916503906
          vf_loss: 0.015360019883761804
    num_agent_steps_sampled: 529000
    num_agent_steps_trained: 529000
    num_steps_sampled: 529000
    num_steps_trained: 529000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,529,22747.9,529000,-2.4123,-2.06,-3.13,241.23


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 530000
  custom_metrics: {}
  date: 2021-10-24_22-50-50
  done: false
  episode_len_mean: 240.11
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.401099999999993
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 5
  episodes_total: 1935
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.095963657087331e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6358309404717551
          entropy_coeff: 0.009999999999999998
          kl: 0.05257777762221375
          policy_loss: -0.0033694002363416882
          total_loss: 0.0059541570643583935
          vf_explained_var: 0.2103455662727356
          vf_loss: 0.015681865480211048
    num_agent_steps_sampled: 530000
    num_agent_steps_trained: 530000
    num_steps_sampled: 530000
    num_steps_trained: 530000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,530,22793.1,530000,-2.4011,-2.06,-3.13,240.11


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 531000
  custom_metrics: {}
  date: 2021-10-24_22-51-32
  done: false
  episode_len_mean: 240.45
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4044999999999925
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1939
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.143945485630997e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9121619323889415
          entropy_coeff: 0.009999999999999998
          kl: 0.015382860095506759
          policy_loss: 0.019931614812877443
          total_loss: 0.020213376813464693
          vf_explained_var: 0.2398354858160019
          vf_loss: 0.009403382947978874
    num_agent_steps_sampled: 531000
    num_agent_steps_trained: 531000
    num_steps_sampled: 531000
    num_steps_trained: 531000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,531,22834.3,531000,-2.4045,-2.06,-3.13,240.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 532000
  custom_metrics: {}
  date: 2021-10-24_22-52-13
  done: false
  episode_len_mean: 241.01
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4100999999999924
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 3
  episodes_total: 1942
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.143945485630997e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9593555145793491
          entropy_coeff: 0.009999999999999998
          kl: 0.014006162440733761
          policy_loss: -0.038168421553240885
          total_loss: -0.037634592337740795
          vf_explained_var: 0.11581408977508545
          vf_loss: 0.010127383506753379
    num_agent_steps_sampled: 532000
    num_agent_steps_trained: 532000
    num_steps_sampled: 532000
    num_steps_trained: 532000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,532,22875.8,532000,-2.4101,-2.06,-3.13,241.01


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 533000
  custom_metrics: {}
  date: 2021-10-24_22-52-56
  done: false
  episode_len_mean: 240.64
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4063999999999925
  episode_reward_min: -3.00999999999998
  episodes_this_iter: 4
  episodes_total: 1946
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.143945485630997e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9008644183476766
          entropy_coeff: 0.009999999999999998
          kl: 0.011286117227552002
          policy_loss: -0.04025548067357805
          total_loss: -0.03725387106339137
          vf_explained_var: 0.1270010769367218
          vf_loss: 0.012010253013836012
    num_agent_steps_sampled: 533000
    num_agent_steps_trained: 533000
    num_steps_sampled: 533000
    num_steps_trained: 533000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,533,22918.7,533000,-2.4064,-2.06,-3.01,240.64




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 534000
  custom_metrics: {}
  date: 2021-10-24_22-53-55
  done: false
  episode_len_mean: 240.37
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4036999999999926
  episode_reward_min: -3.00999999999998
  episodes_this_iter: 4
  episodes_total: 1950
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.143945485630997e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9091581814818912
          entropy_coeff: 0.009999999999999998
          kl: 0.011947475558104277
          policy_loss: -0.033902221586969164
          total_loss: -0.03011237337357468
          vf_explained_var: 0.09646127372980118
          vf_loss: 0.01288143215287063
    num_agent_steps_sampled: 534000
    num_agent_steps_trained: 534000
    num_steps_sampled: 534000
    num_steps_trained: 534000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,534,22977.1,534000,-2.4037,-2.06,-3.01,240.37


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 535000
  custom_metrics: {}
  date: 2021-10-24_22-54-40
  done: false
  episode_len_mean: 240.86
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4085999999999927
  episode_reward_min: -3.00999999999998
  episodes_this_iter: 4
  episodes_total: 1954
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.143945485630997e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9141907023058997
          entropy_coeff: 0.009999999999999998
          kl: 0.00790141703871004
          policy_loss: 0.014209285461240345
          total_loss: 0.017911882532967462
          vf_explained_var: 0.10592835396528244
          vf_loss: 0.012844504374596808
    num_agent_steps_sampled: 535000
    num_agent_steps_trained: 535000
    num_steps_sampled: 535000
    num_steps_trained: 535000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,535,23022.4,535000,-2.4086,-2.06,-3.01,240.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 536000
  custom_metrics: {}
  date: 2021-10-24_22-55-23
  done: false
  episode_len_mean: 241.18
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4117999999999924
  episode_reward_min: -2.8399999999999834
  episodes_this_iter: 4
  episodes_total: 1958
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.143945485630997e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8821837186813355
          entropy_coeff: 0.009999999999999998
          kl: 0.01798059025229135
          policy_loss: 0.014440488815307618
          total_loss: 0.017962521976894803
          vf_explained_var: 0.15581746399402618
          vf_loss: 0.012343870662152766
    num_agent_steps_sampled: 536000
    num_agent_steps_trained: 536000
    num_steps_sampled: 536000
    num_steps_trained: 536000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,536,23065.4,536000,-2.4118,-2.06,-2.84,241.18


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 537000
  custom_metrics: {}
  date: 2021-10-24_22-56-03
  done: false
  episode_len_mean: 242.86
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4285999999999923
  episode_reward_min: -2.909999999999982
  episodes_this_iter: 4
  episodes_total: 1962
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.143945485630997e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0186113946967654
          entropy_coeff: 0.009999999999999998
          kl: 0.009014969129789562
          policy_loss: -0.009182908965481653
          total_loss: -0.006170251551601622
          vf_explained_var: 0.1494085043668747
          vf_loss: 0.01319877521859275
    num_agent_steps_sampled: 537000
    num_agent_steps_trained: 537000
    num_steps_sampled: 537000
    num_steps_trained: 537000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,537,23105.7,537000,-2.4286,-2.06,-2.91,242.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 538000
  custom_metrics: {}
  date: 2021-10-24_22-56-45
  done: false
  episode_len_mean: 243.69
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4368999999999925
  episode_reward_min: -2.909999999999982
  episodes_this_iter: 3
  episodes_total: 1965
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.143945485630997e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.919423849052853
          entropy_coeff: 0.009999999999999998
          kl: 0.00580754180445832
          policy_loss: -0.044108274827400845
          total_loss: -0.04265765125552813
          vf_explained_var: 0.09610707312822342
          vf_loss: 0.010644860917495357
    num_agent_steps_sampled: 538000
    num_agent_steps_trained: 538000
    num_steps_sampled: 538000
    num_steps_trained: 538000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,538,23147.7,538000,-2.4369,-2.06,-2.91,243.69


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 539000
  custom_metrics: {}
  date: 2021-10-24_22-57-25
  done: false
  episode_len_mean: 245.04
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.450399999999992
  episode_reward_min: -2.909999999999982
  episodes_this_iter: 4
  episodes_total: 1969
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.143945485630997e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9137639019224378
          entropy_coeff: 0.009999999999999998
          kl: 0.007926903079022276
          policy_loss: 0.014188030983010929
          total_loss: 0.018539772596624163
          vf_explained_var: 0.08688273280858994
          vf_loss: 0.013489382072455354
    num_agent_steps_sampled: 539000
    num_agent_steps_trained: 539000
    num_steps_sampled: 539000
    num_steps_trained: 539000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,539,23187.7,539000,-2.4504,-2.06,-2.91,245.04


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 540000
  custom_metrics: {}
  date: 2021-10-24_22-58-07
  done: false
  episode_len_mean: 245.97
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.459699999999991
  episode_reward_min: -2.909999999999982
  episodes_this_iter: 4
  episodes_total: 1973
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.143945485630997e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0067180924945407
          entropy_coeff: 0.009999999999999998
          kl: 0.007170234163263937
          policy_loss: 0.019610421359539033
          total_loss: 0.022741580920086966
          vf_explained_var: 0.11895006895065308
          vf_loss: 0.013198343633363644
    num_agent_steps_sampled: 540000
    num_agent_steps_trained: 540000
    num_steps_sampled: 540000
    num_steps_trained: 540000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,540,23229.2,540000,-2.4597,-2.06,-2.91,245.97


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 541000
  custom_metrics: {}
  date: 2021-10-24_22-58-47
  done: false
  episode_len_mean: 247.31
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4730999999999908
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 3
  episodes_total: 1976
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.143945485630997e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0705630136860742
          entropy_coeff: 0.009999999999999998
          kl: 0.01774413681489209
          policy_loss: -0.10020213094022538
          total_loss: -0.09751460519101884
          vf_explained_var: 0.16854174435138702
          vf_loss: 0.013393155133558644
    num_agent_steps_sampled: 541000
    num_agent_steps_trained: 541000
    num_steps_sampled: 541000
    num_steps_trained: 541000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,541,23269.6,541000,-2.4731,-2.06,-3.14,247.31




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 542000
  custom_metrics: {}
  date: 2021-10-24_22-59-40
  done: false
  episode_len_mean: 248.8
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4879999999999907
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 1980
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.143945485630997e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2463079783651563
          entropy_coeff: 0.009999999999999998
          kl: 0.03972716792449919
          policy_loss: 0.014619572957356771
          total_loss: 0.015499141315619152
          vf_explained_var: 0.1941225528717041
          vf_loss: 0.013342646726717552
    num_agent_steps_sampled: 542000
    num_agent_steps_trained: 542000
    num_steps_sampled: 542000
    num_steps_trained: 542000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,542,23322.6,542000,-2.488,-2.06,-3.26,248.8


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 543000
  custom_metrics: {}
  date: 2021-10-24_23-00-24
  done: false
  episode_len_mean: 250.25
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.50249999999999
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 1984
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.215918228446495e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.1784699731402928
          entropy_coeff: 0.009999999999999998
          kl: 0.008883821959477463
          policy_loss: -0.005014852434396744
          total_loss: -0.004520777074827088
          vf_explained_var: 0.19670575857162476
          vf_loss: 0.012278772973352009
    num_agent_steps_sampled: 543000
    num_agent_steps_trained: 543000
    num_steps_sampled: 543000
    num_steps_trained: 543000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,543,23365.9,543000,-2.5025,-2.06,-3.26,250.25


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 544000
  custom_metrics: {}
  date: 2021-10-24_23-01-06
  done: false
  episode_len_mean: 251.46
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.51459999999999
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 3
  episodes_total: 1987
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.215918228446495e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.048403831322988
          entropy_coeff: 0.009999999999999998
          kl: 0.007862412630312576
          policy_loss: 0.016959595763021047
          total_loss: 0.01579103014535374
          vf_explained_var: 0.18771198391914368
          vf_loss: 0.009315473953675893
    num_agent_steps_sampled: 544000
    num_agent_steps_trained: 544000
    num_steps_sampled: 544000
    num_steps_trained: 544000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,544,23407.9,544000,-2.5146,-2.06,-3.26,251.46


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 545000
  custom_metrics: {}
  date: 2021-10-24_23-01-47
  done: false
  episode_len_mean: 253.06
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.53059999999999
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 1991
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.215918228446495e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9732821736070845
          entropy_coeff: 0.009999999999999998
          kl: 0.00652675504294938
          policy_loss: 0.01158580022553603
          total_loss: 0.015166665779219733
          vf_explained_var: 0.16786955296993256
          vf_loss: 0.013313689155297147
    num_agent_steps_sampled: 545000
    num_agent_steps_trained: 545000
    num_steps_sampled: 545000
    num_steps_trained: 545000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,545,23449.3,545000,-2.5306,-2.1,-3.26,253.06


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 546000
  custom_metrics: {}
  date: 2021-10-24_23-02-28
  done: false
  episode_len_mean: 253.92
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.5391999999999895
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 1995
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.215918228446495e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9558296567863889
          entropy_coeff: 0.009999999999999998
          kl: 0.008156507199311767
          policy_loss: 0.014434191212058068
          total_loss: 0.018125263270404604
          vf_explained_var: 0.1771913319826126
          vf_loss: 0.013249366119917895
    num_agent_steps_sampled: 546000
    num_agent_steps_trained: 546000
    num_steps_sampled: 546000
    num_steps_trained: 54600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,546,23490.3,546000,-2.5392,-2.1,-3.26,253.92


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 547000
  custom_metrics: {}
  date: 2021-10-24_23-03-10
  done: false
  episode_len_mean: 255.15
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.5514999999999897
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 1999
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.215918228446495e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8189334445529514
          entropy_coeff: 0.009999999999999998
          kl: 0.009071652090964399
          policy_loss: -0.015297026518318388
          total_loss: -0.010157804522249434
          vf_explained_var: 0.15743854641914368
          vf_loss: 0.013328555029713446
    num_agent_steps_sampled: 547000
    num_agent_steps_trained: 547000
    num_steps_sampled: 547000
    num_steps_trained: 54

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,547,23532.3,547000,-2.5515,-2.1,-3.26,255.15


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 548000
  custom_metrics: {}
  date: 2021-10-24_23-03-50
  done: false
  episode_len_mean: 256.07
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.560699999999989
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 3
  episodes_total: 2002
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.215918228446495e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8941500518057082
          entropy_coeff: 0.009999999999999998
          kl: 0.009462913458315509
          policy_loss: -0.030330782383680345
          total_loss: -0.028844462500678167
          vf_explained_var: 0.179536834359169
          vf_loss: 0.010427821970855197
    num_agent_steps_sampled: 548000
    num_agent_steps_trained: 548000
    num_steps_sampled: 548000
    num_steps_trained: 54800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,548,23572,548000,-2.5607,-2.1,-3.26,256.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 549000
  custom_metrics: {}
  date: 2021-10-24_23-04-30
  done: false
  episode_len_mean: 257.52
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.575199999999989
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 2006
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.215918228446495e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7977168195777469
          entropy_coeff: 0.009999999999999998
          kl: 0.008867723395584113
          policy_loss: 0.030432351099120247
          total_loss: 0.03430936907728513
          vf_explained_var: 0.17997361719608307
          vf_loss: 0.011854181847431594
    num_agent_steps_sampled: 549000
    num_agent_steps_trained: 549000
    num_steps_sampled: 549000
    num_steps_trained: 549000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,549,23612.3,549000,-2.5752,-2.1,-3.26,257.52




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 550000
  custom_metrics: {}
  date: 2021-10-24_23-05-31
  done: false
  episode_len_mean: 258.44
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.5843999999999894
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 2010
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.215918228446495e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7006777809725867
          entropy_coeff: 0.009999999999999998
          kl: 0.005575184013477812
          policy_loss: 0.036410932739575706
          total_loss: 0.040078577978743445
          vf_explained_var: 0.12805849313735962
          vf_loss: 0.01067441941704601
    num_agent_steps_sampled: 550000
    num_agent_steps_trained: 550000
    num_steps_sampled: 550000
    num_steps_trained: 55000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,550,23672.9,550000,-2.5844,-2.1,-3.26,258.44


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 551000
  custom_metrics: {}
  date: 2021-10-24_23-06-18
  done: false
  episode_len_mean: 259.08
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.590799999999988
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 2014
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.215918228446495e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6408466034465365
          entropy_coeff: 0.009999999999999998
          kl: 0.0061585344981959144
          policy_loss: 0.028055725826157465
          total_loss: 0.036539605425463785
          vf_explained_var: 0.08609545975923538
          vf_loss: 0.014892345884193976
    num_agent_steps_sampled: 551000
    num_agent_steps_trained: 551000
    num_steps_sampled: 551000
    num_steps_trained: 5510

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,551,23720.1,551000,-2.5908,-2.1,-3.26,259.08


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 552000
  custom_metrics: {}
  date: 2021-10-24_23-07-02
  done: false
  episode_len_mean: 260.06
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.600599999999988
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 2018
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.215918228446495e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7080310331450568
          entropy_coeff: 0.009999999999999998
          kl: 0.008123647298798886
          policy_loss: 0.012252914077705807
          total_loss: 0.019880845728847716
          vf_explained_var: 0.0824948251247406
          vf_loss: 0.014708242037643988
    num_agent_steps_sampled: 552000
    num_agent_steps_trained: 552000
    num_steps_sampled: 552000
    num_steps_trained: 552000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,552,23763.6,552000,-2.6006,-2.1,-3.26,260.06


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 553000
  custom_metrics: {}
  date: 2021-10-24_23-07-47
  done: false
  episode_len_mean: 260.63
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.606299999999988
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 2022
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.215918228446495e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6682581451204088
          entropy_coeff: 0.009999999999999998
          kl: 0.005769599646428247
          policy_loss: 0.019256359214584032
          total_loss: 0.02719913154012627
          vf_explained_var: 0.10135114192962646
          vf_loss: 0.014625352383073833
    num_agent_steps_sampled: 553000
    num_agent_steps_trained: 553000
    num_steps_sampled: 553000
    num_steps_trained: 55300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,553,23808.5,553000,-2.6063,-2.28,-3.26,260.63


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 554000
  custom_metrics: {}
  date: 2021-10-24_23-08-31
  done: false
  episode_len_mean: 261.05
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.610499999999988
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 2026
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.215918228446495e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8398123304049174
          entropy_coeff: 0.009999999999999998
          kl: 0.021277912278885737
          policy_loss: 0.03293826033671697
          total_loss: 0.03792910112275018
          vf_explained_var: 0.17612621188163757
          vf_loss: 0.013388962960905498
    num_agent_steps_sampled: 554000
    num_agent_steps_trained: 554000
    num_steps_sampled: 554000
    num_steps_trained: 554000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,554,23852.6,554000,-2.6105,-2.28,-3.26,261.05


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 555000
  custom_metrics: {}
  date: 2021-10-24_23-09-16
  done: false
  episode_len_mean: 261.45
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.614499999999988
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 2030
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3823877342669744e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.702008475197686
          entropy_coeff: 0.009999999999999998
          kl: 0.015722873483692487
          policy_loss: 0.018235992184943622
          total_loss: 0.026148309434453645
          vf_explained_var: 0.11552257835865021
          vf_loss: 0.014932402368221019
    num_agent_steps_sampled: 555000
    num_agent_steps_trained: 555000
    num_steps_sampled: 555000
    num_steps_trained: 5550

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,555,23897.6,555000,-2.6145,-2.28,-3.26,261.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 556000
  custom_metrics: {}
  date: 2021-10-24_23-09-58
  done: false
  episode_len_mean: 262.07
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.620699999999988
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 2034
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3823877342669744e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8826645917362637
          entropy_coeff: 0.009999999999999998
          kl: 0.013821520519484822
          policy_loss: 0.011462182386053933
          total_loss: 0.016800526819295353
          vf_explained_var: 0.1888117641210556
          vf_loss: 0.014164989814162254
    num_agent_steps_sampled: 556000
    num_agent_steps_trained: 556000
    num_steps_sampled: 556000
    num_steps_trained: 55600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,556,23939.6,556000,-2.6207,-2.3,-3.26,262.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 557000
  custom_metrics: {}
  date: 2021-10-24_23-10-39
  done: false
  episode_len_mean: 263.81
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.666399999999988
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 3
  episodes_total: 2037
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3823877342669744e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.3861972596910266
          entropy_coeff: 0.009999999999999998
          kl: 0.09099347267034928
          policy_loss: -0.0006532150010267894
          total_loss: 0.09938773777749803
          vf_explained_var: 0.43996769189834595
          vf_loss: 0.11390293012890551
    num_agent_steps_sampled: 557000
    num_agent_steps_trained: 557000
    num_steps_sampled: 557000
    num_steps_trained: 557000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,557,23981,557000,-2.6664,-2.3,-7,263.81




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 558000
  custom_metrics: {}
  date: 2021-10-24_23-11-41
  done: false
  episode_len_mean: 264.14
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.6696999999999877
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2041
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0735816014004617e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6515585038397047
          entropy_coeff: 0.009999999999999998
          kl: 0.03925054668513614
          policy_loss: -0.03559054239756531
          total_loss: -0.036432463427384695
          vf_explained_var: 0.3504369854927063
          vf_loss: 0.015673666059349976
    num_agent_steps_sampled: 558000
    num_agent_steps_trained: 558000
    num_steps_sampled: 558000
    num_steps_trained: 55800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,558,24042.9,558000,-2.6697,-2.3,-7,264.14


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 559000
  custom_metrics: {}
  date: 2021-10-24_23-12-29
  done: false
  episode_len_mean: 263.27
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.6609999999999876
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2045
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.110372402100693e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7399211910035876
          entropy_coeff: 0.009999999999999998
          kl: 0.009693303426637476
          policy_loss: 0.021307427684466043
          total_loss: 0.025616418487495848
          vf_explained_var: 0.18756769597530365
          vf_loss: 0.011708200394382907
    num_agent_steps_sampled: 559000
    num_agent_steps_trained: 559000
    num_steps_sampled: 559000
    num_steps_trained: 55900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,559,24090.9,559000,-2.661,-2.3,-7,263.27


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 560000
  custom_metrics: {}
  date: 2021-10-24_23-13-16
  done: false
  episode_len_mean: 262.86
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.6568999999999874
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2049
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.110372402100693e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8406877875328064
          entropy_coeff: 0.009999999999999998
          kl: 0.019809363532051254
          policy_loss: 0.019299245791302787
          total_loss: 0.02472769808438089
          vf_explained_var: 0.21938909590244293
          vf_loss: 0.013835331569943163
    num_agent_steps_sampled: 560000
    num_agent_steps_trained: 560000
    num_steps_sampled: 560000
    num_steps_trained: 560000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,560,24137.4,560000,-2.6569,-2.3,-7,262.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 561000
  custom_metrics: {}
  date: 2021-10-24_23-13-59
  done: false
  episode_len_mean: 263.63
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.693199999999988
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2053
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.110372402100693e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.2671904411580828
          entropy_coeff: 0.009999999999999998
          kl: 0.0687859281079947
          policy_loss: -0.03469432070851326
          total_loss: 0.09716731131904655
          vf_explained_var: 0.4496907889842987
          vf_loss: 0.14453354122945003
    num_agent_steps_sampled: 561000
    num_agent_steps_trained: 561000
    num_steps_sampled: 561000
    num_steps_trained: 561000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,561,24181,561000,-2.6932,-2.34,-7,263.63


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 562000
  custom_metrics: {}
  date: 2021-10-24_23-14-43
  done: false
  episode_len_mean: 264.39
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.700799999999988
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 3
  episodes_total: 2056
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.665558603151038e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.5194574462042914
          entropy_coeff: 0.009999999999999998
          kl: 0.02509664988943903
          policy_loss: -0.12325769315163294
          total_loss: -0.10923034763998456
          vf_explained_var: -0.028500720858573914
          vf_loss: 0.02922192013098134
    num_agent_steps_sampled: 562000
    num_agent_steps_trained: 562000
    num_steps_sampled: 562000
    num_steps_trained: 562000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,562,24224.8,562000,-2.7008,-2.34,-7,264.39


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 563000
  custom_metrics: {}
  date: 2021-10-24_23-15-24
  done: false
  episode_len_mean: 266.44
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.721299999999987
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 3
  episodes_total: 2059
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.998337904726558e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9176152706146241
          entropy_coeff: 0.009999999999999998
          kl: 0.025459406246096074
          policy_loss: 0.01842928363217248
          total_loss: 0.011038156184885238
          vf_explained_var: 0.4731767475605011
          vf_loss: 0.011785026740593214
    num_agent_steps_sampled: 563000
    num_agent_steps_trained: 563000
    num_steps_sampled: 563000
    num_steps_trained: 563000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,563,24265.9,563000,-2.7213,-2.34,-7,266.44


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 564000
  custom_metrics: {}
  date: 2021-10-24_23-16-03
  done: false
  episode_len_mean: 267.43
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.7311999999999865
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2063
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0497506857089838e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4577517482969495
          entropy_coeff: 0.009999999999999998
          kl: 0.02950394041477871
          policy_loss: -0.026433212475644216
          total_loss: -0.029878752844201193
          vf_explained_var: 0.6300923824310303
          vf_loss: 0.01113197781249053
    num_agent_steps_sampled: 564000
    num_agent_steps_trained: 564000
    num_steps_sampled: 564000
    num_steps_trained: 56400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,564,24304.2,564000,-2.7312,-2.34,-7,267.43


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 565000
  custom_metrics: {}
  date: 2021-10-24_23-16-49
  done: false
  episode_len_mean: 266.79
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.7247999999999863
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2067
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5746260285634757e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9770459883742862
          entropy_coeff: 0.009999999999999998
          kl: 0.00957317298914712
          policy_loss: -0.018444093730714588
          total_loss: -0.01836279034614563
          vf_explained_var: 0.6080458164215088
          vf_loss: 0.009851761379589636
    num_agent_steps_sampled: 565000
    num_agent_steps_trained: 565000
    num_steps_sampled: 565000
    num_steps_trained: 56500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,565,24350.5,565000,-2.7248,-2.34,-7,266.79




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 566000
  custom_metrics: {}
  date: 2021-10-24_23-17-53
  done: false
  episode_len_mean: 265.89
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.715799999999987
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2071
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5746260285634757e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.0082285079691145
          entropy_coeff: 0.009999999999999998
          kl: 0.010242005475612517
          policy_loss: -0.010983074373669095
          total_loss: -0.011821160382694668
          vf_explained_var: 0.6598118543624878
          vf_loss: 0.009244196706761916
    num_agent_steps_sampled: 566000
    num_agent_steps_trained: 566000
    num_steps_sampled: 566000
    num_steps_trained: 5660

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,566,24414.1,566000,-2.7158,-2.15,-7,265.89


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 567000
  custom_metrics: {}
  date: 2021-10-24_23-18-39
  done: false
  episode_len_mean: 265.06
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7074999999999876
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2075
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5746260285634757e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9738433433903588
          entropy_coeff: 0.009999999999999998
          kl: 0.058006956891298755
          policy_loss: 0.0014862883422109817
          total_loss: 0.0012418978330161836
          vf_explained_var: 0.5965041518211365
          vf_loss: 0.009494041941232151
    num_agent_steps_sampled: 567000
    num_agent_steps_trained: 567000
    num_steps_sampled: 567000
    num_steps_trained: 567

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,567,24460.6,567000,-2.7075,-2.15,-7,265.06


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 568000
  custom_metrics: {}
  date: 2021-10-24_23-19-24
  done: false
  episode_len_mean: 263.53
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.6921999999999877
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2079
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3619390428452134e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1306023783153958
          entropy_coeff: 0.009999999999999998
          kl: 0.06669395010745044
          policy_loss: -0.044284687605169085
          total_loss: -0.047362427330679364
          vf_explained_var: 0.6356372237205505
          vf_loss: 0.008228282940884431
    num_agent_steps_sampled: 568000
    num_agent_steps_trained: 568000
    num_steps_sampled: 568000
    num_steps_trained: 5680

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,568,24505.2,568000,-2.6922,-2.15,-7,263.53


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 569000
  custom_metrics: {}
  date: 2021-10-24_23-20-03
  done: false
  episode_len_mean: 263.81
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.694999999999988
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 3
  episodes_total: 2082
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5429085642678205e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.2739820334646437
          entropy_coeff: 0.009999999999999998
          kl: 0.01925622423611186
          policy_loss: 0.04628145032458835
          total_loss: 0.040782648821671805
          vf_explained_var: 0.6423513889312744
          vf_loss: 0.007241020457068872
    num_agent_steps_sampled: 569000
    num_agent_steps_trained: 569000
    num_steps_sampled: 569000
    num_steps_trained: 569000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,569,24544.9,569000,-2.695,-2.15,-7,263.81


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 570000
  custom_metrics: {}
  date: 2021-10-24_23-20-44
  done: false
  episode_len_mean: 264.45
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7013999999999867
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2086
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5429085642678205e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1089300023184883
          entropy_coeff: 0.009999999999999998
          kl: 0.01322619752135918
          policy_loss: -0.009092849989732107
          total_loss: -0.009064928028318616
          vf_explained_var: 0.23134343326091766
          vf_loss: 0.011117221332258648
    num_agent_steps_sampled: 570000
    num_agent_steps_trained: 570000
    num_steps_sampled: 570000
    num_steps_trained: 570

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,570,24585.6,570000,-2.7014,-2.15,-7,264.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 571000
  custom_metrics: {}
  date: 2021-10-24_23-21-27
  done: false
  episode_len_mean: 264.29
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.6997999999999873
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 3
  episodes_total: 2089
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5429085642678205e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8226385123199886
          entropy_coeff: 0.009999999999999998
          kl: 0.021684596999063645
          policy_loss: -0.11446145938502418
          total_loss: -0.11137194857001305
          vf_explained_var: 0.17912237346172333
          vf_loss: 0.01131589182962974
    num_agent_steps_sampled: 571000
    num_agent_steps_trained: 571000
    num_steps_sampled: 571000
    num_steps_trained: 57100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,571,24628.5,571000,-2.6998,-2.15,-7,264.29


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 572000
  custom_metrics: {}
  date: 2021-10-24_23-22-11
  done: false
  episode_len_mean: 263.82
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.6950999999999876
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2093
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.31436284640173e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7500237206617991
          entropy_coeff: 0.009999999999999998
          kl: 0.010868381925429717
          policy_loss: -0.050237521073884434
          total_loss: -0.04570789150893688
          vf_explained_var: 0.17428867518901825
          vf_loss: 0.012029865580714411
    num_agent_steps_sampled: 572000
    num_agent_steps_trained: 572000
    num_steps_sampled: 572000
    num_steps_trained: 57200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,572,24672.8,572000,-2.6951,-2.15,-7,263.82


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 573000
  custom_metrics: {}
  date: 2021-10-24_23-22-55
  done: false
  episode_len_mean: 263.18
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.6886999999999883
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2097
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.31436284640173e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6184045030011072
          entropy_coeff: 0.009999999999999998
          kl: 0.008333141019085923
          policy_loss: -0.07457018229696485
          total_loss: -0.0682753533952766
          vf_explained_var: 0.15052799880504608
          vf_loss: 0.01247887397184968
    num_agent_steps_sampled: 573000
    num_agent_steps_trained: 573000
    num_steps_sampled: 573000
    num_steps_trained: 573000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,573,24716.4,573000,-2.6887,-2.15,-7,263.18




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 574000
  custom_metrics: {}
  date: 2021-10-24_23-23-58
  done: false
  episode_len_mean: 261.78
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.6746999999999876
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 5
  episodes_total: 2102
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.31436284640173e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6178062723742591
          entropy_coeff: 0.009999999999999998
          kl: 0.008792724184423012
          policy_loss: -0.012077255960967806
          total_loss: -0.0028723483284314472
          vf_explained_var: 0.15815065801143646
          vf_loss: 0.015382969824390278
    num_agent_steps_sampled: 574000
    num_agent_steps_trained: 574000
    num_steps_sampled: 574000
    num_steps_trained: 57

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,574,24779,574000,-2.6747,-2.14,-7,261.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 575000
  custom_metrics: {}
  date: 2021-10-24_23-24-44
  done: false
  episode_len_mean: 260.64
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.6632999999999885
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2106
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.31436284640173e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7177839345402188
          entropy_coeff: 0.009999999999999998
          kl: 0.014999287144843582
          policy_loss: 0.008076723002725177
          total_loss: 0.011433319416311052
          vf_explained_var: 0.28954100608825684
          vf_loss: 0.010534433647990227
    num_agent_steps_sampled: 575000
    num_agent_steps_trained: 575000
    num_steps_sampled: 575000
    num_steps_trained: 57500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,575,24825.6,575000,-2.6633,-2.14,-7,260.64


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 576000
  custom_metrics: {}
  date: 2021-10-24_23-25-28
  done: false
  episode_len_mean: 260.52
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.6620999999999877
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2110
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.31436284640173e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6749460650814905
          entropy_coeff: 0.009999999999999998
          kl: 0.012119733771854158
          policy_loss: 0.017597996691862742
          total_loss: 0.021728189537922542
          vf_explained_var: 0.26594239473342896
          vf_loss: 0.010879653868161969
    num_agent_steps_sampled: 576000
    num_agent_steps_trained: 576000
    num_steps_sampled: 576000
    num_steps_trained: 57600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,576,24869.4,576000,-2.6621,-2.14,-7,260.52


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 577000
  custom_metrics: {}
  date: 2021-10-24_23-26-15
  done: false
  episode_len_mean: 260.42
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.6610999999999883
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2114
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.31436284640173e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6159564793109894
          entropy_coeff: 0.009999999999999998
          kl: 0.023956855376159245
          policy_loss: 0.02345388283332189
          total_loss: 0.028242619584004085
          vf_explained_var: 0.2550584077835083
          vf_loss: 0.010948300847990646
    num_agent_steps_sampled: 577000
    num_agent_steps_trained: 577000
    num_steps_sampled: 577000
    num_steps_trained: 577000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,577,24915.8,577000,-2.6611,-2.14,-7,260.42


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 578000
  custom_metrics: {}
  date: 2021-10-24_23-27-00
  done: false
  episode_len_mean: 259.99
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.656799999999988
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2118
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.971544269602596e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5387402974896961
          entropy_coeff: 0.009999999999999998
          kl: 0.003469096430919194
          policy_loss: 0.03871689583692286
          total_loss: 0.04467590645783477
          vf_explained_var: 0.21529941260814667
          vf_loss: 0.011346412801908122
    num_agent_steps_sampled: 578000
    num_agent_steps_trained: 578000
    num_steps_sampled: 578000
    num_steps_trained: 578000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,578,24961,578000,-2.6568,-2.14,-7,259.99


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 579000
  custom_metrics: {}
  date: 2021-10-24_23-27-46
  done: false
  episode_len_mean: 259.68
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.653699999999988
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2122
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.985772134801298e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.4962181525097953
          entropy_coeff: 0.009999999999999998
          kl: 0.0046559915127471815
          policy_loss: 0.03785874868432681
          total_loss: 0.04355157795879576
          vf_explained_var: 0.173823744058609
          vf_loss: 0.010655013621888226
    num_agent_steps_sampled: 579000
    num_agent_steps_trained: 579000
    num_steps_sampled: 579000
    num_steps_trained: 579000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,579,25007,579000,-2.6537,-2.14,-7,259.68


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 580000
  custom_metrics: {}
  date: 2021-10-24_23-28-32
  done: false
  episode_len_mean: 259.35
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.650399999999988
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2126
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.992886067400649e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6099723617235819
          entropy_coeff: 0.009999999999999998
          kl: 0.013268783725602537
          policy_loss: -0.039385518597231974
          total_loss: -0.031787145510315896
          vf_explained_var: 0.10392607748508453
          vf_loss: 0.013698096490568586
    num_agent_steps_sampled: 580000
    num_agent_steps_trained: 580000
    num_steps_sampled: 580000
    num_steps_trained: 580

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,580,25053.5,580000,-2.6504,-2.14,-7,259.35




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 581000
  custom_metrics: {}
  date: 2021-10-24_23-29-35
  done: false
  episode_len_mean: 258.82
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.645099999999988
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 4
  episodes_total: 2130
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.992886067400649e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.4533909797668457
          entropy_coeff: 0.009999999999999998
          kl: 0.009890502790556057
          policy_loss: -0.11111087765958574
          total_loss: -0.09878176483843061
          vf_explained_var: 0.16276176273822784
          vf_loss: 0.01686302558001545
    num_agent_steps_sampled: 581000
    num_agent_steps_trained: 581000
    num_steps_sampled: 581000
    num_steps_trained: 581000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,581,25116,581000,-2.6451,-2.14,-7,258.82


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 582000
  custom_metrics: {}
  date: 2021-10-24_23-30-19
  done: false
  episode_len_mean: 257.96
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.636499999999989
  episode_reward_min: -6.999999999999952
  episodes_this_iter: 5
  episodes_total: 2135
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.992886067400649e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5004027906391356
          entropy_coeff: 0.009999999999999998
          kl: 0.0036869062452922208
          policy_loss: -0.0004863669474919637
          total_loss: 0.008199626869625515
          vf_explained_var: 0.18393206596374512
          vf_loss: 0.01369002220324344
    num_agent_steps_sampled: 582000
    num_agent_steps_trained: 582000
    num_steps_sampled: 582000
    num_steps_trained: 582

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,582,25160,582000,-2.6365,-2.14,-7,257.96


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 583000
  custom_metrics: {}
  date: 2021-10-24_23-31-05
  done: false
  episode_len_mean: 254.9
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.5775999999999892
  episode_reward_min: -6.169999999999957
  episodes_this_iter: 4
  episodes_total: 2139
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.964430337003245e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.4491220027208328
          entropy_coeff: 0.009999999999999998
          kl: 0.004084463017226033
          policy_loss: 0.023707883059978486
          total_loss: 0.032486105461915334
          vf_explained_var: 0.1662304848432541
          vf_loss: 0.013269442475090425
    num_agent_steps_sampled: 583000
    num_agent_steps_trained: 583000
    num_steps_sampled: 583000
    num_steps_trained: 583000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,583,25205.5,583000,-2.5776,-2.14,-6.17,254.9


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 584000
  custom_metrics: {}
  date: 2021-10-24_23-31-49
  done: false
  episode_len_mean: 254.84
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.5769999999999893
  episode_reward_min: -6.169999999999957
  episodes_this_iter: 4
  episodes_total: 2143
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.982215168501623e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.5202462557289336
          entropy_coeff: 0.009999999999999998
          kl: 0.005683241708131466
          policy_loss: 0.01805381460322274
          total_loss: 0.02617438534895579
          vf_explained_var: 0.21383412182331085
          vf_loss: 0.013323034346103668
    num_agent_steps_sampled: 584000
    num_agent_steps_trained: 584000
    num_steps_sampled: 584000
    num_steps_trained: 584000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,584,25250.2,584000,-2.577,-2.14,-6.17,254.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 585000
  custom_metrics: {}
  date: 2021-10-24_23-32-34
  done: false
  episode_len_mean: 254.65
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.5750999999999897
  episode_reward_min: -6.169999999999957
  episodes_this_iter: 4
  episodes_total: 2147
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.982215168501623e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.5323361933231354
          entropy_coeff: 0.009999999999999998
          kl: 0.004113278384072711
          policy_loss: 0.0334849298828178
          total_loss: 0.041027650237083435
          vf_explained_var: 0.20131778717041016
          vf_loss: 0.01286607887595892
    num_agent_steps_sampled: 585000
    num_agent_steps_trained: 585000
    num_steps_sampled: 585000
    num_steps_trained: 585000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,585,25295.4,585000,-2.5751,-2.14,-6.17,254.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 586000
  custom_metrics: {}
  date: 2021-10-24_23-33-21
  done: false
  episode_len_mean: 253.54
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.53539999999999
  episode_reward_min: -3.559999999999968
  episodes_this_iter: 4
  episodes_total: 2151
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4911075842508113e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.5176955908536911
          entropy_coeff: 0.009999999999999998
          kl: 0.007711409246696929
          policy_loss: -0.007431618496775627
          total_loss: -0.00034050982859399584
          vf_explained_var: 0.22406403720378876
          vf_loss: 0.012268065599103768
    num_agent_steps_sampled: 586000
    num_agent_steps_trained: 586000
    num_steps_sampled: 586000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,586,25341.9,586000,-2.5354,-2.14,-3.56,253.54


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 587000
  custom_metrics: {}
  date: 2021-10-24_23-34-05
  done: false
  episode_len_mean: 252.62
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.52619999999999
  episode_reward_min: -3.559999999999968
  episodes_this_iter: 4
  episodes_total: 2155
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4911075842508113e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6870177063677047
          entropy_coeff: 0.009999999999999998
          kl: 0.00639362138189268
          policy_loss: -0.05223656520247459
          total_loss: -0.04661749742097325
          vf_explained_var: 0.2744450569152832
          vf_loss: 0.012489243958973223
    num_agent_steps_sampled: 587000
    num_agent_steps_trained: 587000
    num_steps_sampled: 587000
    num_steps_trained: 587000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,587,25385.5,587000,-2.5262,-2.14,-3.56,252.62




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 588000
  custom_metrics: {}
  date: 2021-10-24_23-35-07
  done: false
  episode_len_mean: 248.79
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.487899999999991
  episode_reward_min: -3.149999999999977
  episodes_this_iter: 5
  episodes_total: 2160
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4911075842508113e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.5435088535149892
          entropy_coeff: 0.009999999999999998
          kl: 0.004127052640594542
          policy_loss: 0.03319583473106225
          total_loss: 0.04105223384168413
          vf_explained_var: 0.22360551357269287
          vf_loss: 0.013291487066696087
    num_agent_steps_sampled: 588000
    num_agent_steps_trained: 588000
    num_steps_sampled: 588000
    num_steps_trained: 588000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,588,25448.1,588000,-2.4879,-2.14,-3.15,248.79


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 589000
  custom_metrics: {}
  date: 2021-10-24_23-35-49
  done: false
  episode_len_mean: 247.63
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.476299999999991
  episode_reward_min: -3.0799999999999783
  episodes_this_iter: 4
  episodes_total: 2164
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2455537921254057e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6677720440758599
          entropy_coeff: 0.009999999999999998
          kl: 0.004009035067751891
          policy_loss: -0.01607223567035463
          total_loss: -0.010681091414557562
          vf_explained_var: 0.3360413610935211
          vf_loss: 0.01206886913213465
    num_agent_steps_sampled: 589000
    num_agent_steps_trained: 589000
    num_steps_sampled: 589000
    num_steps_trained: 5890

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,589,25489.9,589000,-2.4763,-2.14,-3.08,247.63


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 590000
  custom_metrics: {}
  date: 2021-10-24_23-36-34
  done: false
  episode_len_mean: 247.47
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.474699999999991
  episode_reward_min: -3.0799999999999783
  episodes_this_iter: 4
  episodes_total: 2168
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.227768960627028e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6353663802146912
          entropy_coeff: 0.009999999999999998
          kl: 0.007006166660573459
          policy_loss: -0.0017825851009951699
          total_loss: 0.003945984691381454
          vf_explained_var: 0.2704166769981384
          vf_loss: 0.012082233155767123
    num_agent_steps_sampled: 590000
    num_agent_steps_trained: 590000
    num_steps_sampled: 590000
    num_steps_trained: 590

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,590,25534.8,590000,-2.4747,-2.14,-3.08,247.47


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 591000
  custom_metrics: {}
  date: 2021-10-24_23-37-18
  done: false
  episode_len_mean: 248.07
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.480699999999991
  episode_reward_min: -3.0799999999999783
  episodes_this_iter: 3
  episodes_total: 2171
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.227768960627028e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6500547356075711
          entropy_coeff: 0.009999999999999998
          kl: 0.010982808493501908
          policy_loss: -0.10669789355662133
          total_loss: -0.10119034142957793
          vf_explained_var: 0.2650107741355896
          vf_loss: 0.012008097374604808
    num_agent_steps_sampled: 591000
    num_agent_steps_trained: 591000
    num_steps_sampled: 591000
    num_steps_trained: 59100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,591,25578.8,591000,-2.4807,-2.14,-3.08,248.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 592000
  custom_metrics: {}
  date: 2021-10-24_23-38-02
  done: false
  episode_len_mean: 248.26
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.482599999999991
  episode_reward_min: -3.0799999999999783
  episodes_this_iter: 4
  episodes_total: 2175
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.227768960627028e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6012316134240893
          entropy_coeff: 0.009999999999999998
          kl: 0.008547987953225089
          policy_loss: -0.03206887493530909
          total_loss: -0.02530560601088736
          vf_explained_var: 0.23156236112117767
          vf_loss: 0.012775584279249111
    num_agent_steps_sampled: 592000
    num_agent_steps_trained: 592000
    num_steps_sampled: 592000
    num_steps_trained: 5920

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,592,25622.4,592000,-2.4826,-2.14,-3.08,248.26


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 593000
  custom_metrics: {}
  date: 2021-10-24_23-38-48
  done: false
  episode_len_mean: 247.72
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.477199999999991
  episode_reward_min: -3.0799999999999783
  episodes_this_iter: 4
  episodes_total: 2179
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.227768960627028e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.44933050870895386
          entropy_coeff: 0.009999999999999998
          kl: 0.004380451739760834
          policy_loss: -0.12162304694453875
          total_loss: -0.1099165706998772
          vf_explained_var: 0.18326488137245178
          vf_loss: 0.016199781818108425
    num_agent_steps_sampled: 593000
    num_agent_steps_trained: 593000
    num_steps_sampled: 593000
    num_steps_trained: 5930

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,593,25669.2,593000,-2.4772,-2.14,-3.08,247.72


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 594000
  custom_metrics: {}
  date: 2021-10-24_23-39-34
  done: false
  episode_len_mean: 245.23
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4522999999999913
  episode_reward_min: -2.7899999999999845
  episodes_this_iter: 5
  episodes_total: 2184
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.113884480313514e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.3378790772623486
          entropy_coeff: 0.009999999999999998
          kl: 0.005239392607849661
          policy_loss: 0.01128382682800293
          total_loss: 0.02074189161260923
          vf_explained_var: 0.17713330686092377
          vf_loss: 0.01283685758502947
    num_agent_steps_sampled: 594000
    num_agent_steps_trained: 594000
    num_steps_sampled: 594000
    num_steps_trained: 594000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,594,25715.1,594000,-2.4523,-2.14,-2.79,245.23


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 595000
  custom_metrics: {}
  date: 2021-10-24_23-40-20
  done: false
  episode_len_mean: 243.92
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.439199999999992
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 4
  episodes_total: 2188
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.113884480313514e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.3279635853237576
          entropy_coeff: 0.009999999999999998
          kl: 0.01193066524589944
          policy_loss: 0.023862524413400227
          total_loss: 0.03304692407449086
          vf_explained_var: 0.1207151934504509
          vf_loss: 0.012464034267597728
    num_agent_steps_sampled: 595000
    num_agent_steps_trained: 595000
    num_steps_sampled: 595000
    num_steps_trained: 595000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,595,25760.6,595000,-2.4392,-2.14,-2.73,243.92




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 596000
  custom_metrics: {}
  date: 2021-10-24_23-41-24
  done: false
  episode_len_mean: 243.13
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.431299999999992
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 4
  episodes_total: 2192
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.113884480313514e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.4606738282574548
          entropy_coeff: 0.009999999999999998
          kl: 0.008642241870223997
          policy_loss: -0.006823233721984757
          total_loss: 0.001595302340057161
          vf_explained_var: 0.23765429854393005
          vf_loss: 0.013025276963081625
    num_agent_steps_sampled: 596000
    num_agent_steps_trained: 596000
    num_steps_sampled: 596000
    num_steps_trained: 5960

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,596,25824.6,596000,-2.4313,-2.01,-2.73,243.13


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 597000
  custom_metrics: {}
  date: 2021-10-24_23-42-06
  done: false
  episode_len_mean: 242.94
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4293999999999922
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 4
  episodes_total: 2196
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.113884480313514e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.43791659110122255
          entropy_coeff: 0.009999999999999998
          kl: 0.008477725057898056
          policy_loss: -0.007994048131836785
          total_loss: 0.0019964681731330022
          vf_explained_var: 0.09835688024759293
          vf_loss: 0.014369679987430573
    num_agent_steps_sampled: 597000
    num_agent_steps_trained: 597000
    num_steps_sampled: 597000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,597,25866.4,597000,-2.4294,-2.01,-2.73,242.94


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 598000
  custom_metrics: {}
  date: 2021-10-24_23-42-50
  done: false
  episode_len_mean: 243.25
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.432499999999992
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 4
  episodes_total: 2200
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.113884480313514e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5531041989723842
          entropy_coeff: 0.009999999999999998
          kl: 0.006529120624930505
          policy_loss: -0.04270856206615766
          total_loss: -0.03432413695587052
          vf_explained_var: 0.1576341986656189
          vf_loss: 0.01391546975614296
    num_agent_steps_sampled: 598000
    num_agent_steps_trained: 598000
    num_steps_sampled: 598000
    num_steps_trained: 598000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,598,25910.4,598000,-2.4325,-2.01,-2.73,243.25


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 599000
  custom_metrics: {}
  date: 2021-10-24_23-43-35
  done: false
  episode_len_mean: 243.41
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.434099999999992
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 4
  episodes_total: 2204
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.113884480313514e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.49884206818209753
          entropy_coeff: 0.009999999999999998
          kl: 0.003856800492282433
          policy_loss: -0.0236731033358309
          total_loss: -0.015508164134290483
          vf_explained_var: 0.19253861904144287
          vf_loss: 0.013153365150921874
    num_agent_steps_sampled: 599000
    num_agent_steps_trained: 599000
    num_steps_sampled: 599000
    num_steps_trained: 5990

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,599,25955.7,599000,-2.4341,-2.01,-2.73,243.41


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 600000
  custom_metrics: {}
  date: 2021-10-24_23-44-20
  done: false
  episode_len_mean: 243.34
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4333999999999922
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 4
  episodes_total: 2208
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.556942240156757e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.48000548481941224
          entropy_coeff: 0.009999999999999998
          kl: 0.005247445201776715
          policy_loss: -0.08529692714413008
          total_loss: -0.07588838413357735
          vf_explained_var: 0.20167537033557892
          vf_loss: 0.014208596334275273
    num_agent_steps_sampled: 600000
    num_agent_steps_trained: 600000
    num_steps_sampled: 600000
    num_steps_trained: 600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,600,26000.9,600000,-2.4334,-2.01,-2.73,243.34


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 601000
  custom_metrics: {}
  date: 2021-10-24_23-45-05
  done: false
  episode_len_mean: 243.1
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.430999999999992
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 5
  episodes_total: 2213
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.556942240156757e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.49937540193398794
          entropy_coeff: 0.009999999999999998
          kl: 0.0062248468940659055
          policy_loss: -0.028483963716361256
          total_loss: -0.018045387168725332
          vf_explained_var: 0.21469584107398987
          vf_loss: 0.015432329362051354
    num_agent_steps_sampled: 601000
    num_agent_steps_trained: 601000
    num_steps_sampled: 601000
    num_steps_trained: 60

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,601,26045.6,601000,-2.431,-2.01,-2.73,243.1


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 602000
  custom_metrics: {}
  date: 2021-10-24_23-45-51
  done: false
  episode_len_mean: 242.9
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4289999999999923
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 4
  episodes_total: 2217
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.556942240156757e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.4695778979195489
          entropy_coeff: 0.009999999999999998
          kl: 0.004731594275505834
          policy_loss: 0.012073116583956612
          total_loss: 0.019196098463402853
          vf_explained_var: 0.2329898178577423
          vf_loss: 0.01181876061277257
    num_agent_steps_sampled: 602000
    num_agent_steps_trained: 602000
    num_steps_sampled: 602000
    num_steps_trained: 602000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,602,26091.4,602000,-2.429,-2.01,-2.73,242.9




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 603000
  custom_metrics: {}
  date: 2021-10-24_23-46-56
  done: false
  episode_len_mean: 242.86
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.428599999999992
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 4
  episodes_total: 2221
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.784711200783785e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.4014207747247484
          entropy_coeff: 0.009999999999999998
          kl: 0.008857024579318503
          policy_loss: 0.02133136408196555
          total_loss: 0.030004687441719902
          vf_explained_var: 0.1545054167509079
          vf_loss: 0.012687530658311314
    num_agent_steps_sampled: 603000
    num_agent_steps_trained: 603000
    num_steps_sampled: 603000
    num_steps_trained: 603000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,603,26156,603000,-2.4286,-2.01,-2.73,242.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 604000
  custom_metrics: {}
  date: 2021-10-24_23-47-38
  done: false
  episode_len_mean: 242.87
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.428699999999992
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 4
  episodes_total: 2225
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.784711200783785e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.37529123028119404
          entropy_coeff: 0.009999999999999998
          kl: 0.004366653205341701
          policy_loss: 0.02566660245259603
          total_loss: 0.034705818278921975
          vf_explained_var: 0.09632817655801773
          vf_loss: 0.012792126927524804
    num_agent_steps_sampled: 604000
    num_agent_steps_trained: 604000
    num_steps_sampled: 604000
    num_steps_trained: 60400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,604,26198.5,604000,-2.4287,-2.01,-2.73,242.87


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 605000
  custom_metrics: {}
  date: 2021-10-24_23-48-25
  done: false
  episode_len_mean: 242.76
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.427599999999992
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 4
  episodes_total: 2229
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.892355600391893e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.3267552094327079
          entropy_coeff: 0.009999999999999998
          kl: 0.007653662182460942
          policy_loss: -0.018932697425285975
          total_loss: -0.008817730678452386
          vf_explained_var: 0.08385813236236572
          vf_loss: 0.013382520081682338
    num_agent_steps_sampled: 605000
    num_agent_steps_trained: 605000
    num_steps_sampled: 605000
    num_steps_trained: 605

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,605,26245.1,605000,-2.4276,-2.01,-2.73,242.76


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 606000
  custom_metrics: {}
  date: 2021-10-24_23-49-12
  done: false
  episode_len_mean: 242.77
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4276999999999926
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 5
  episodes_total: 2234
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.892355600391893e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.35960843563079836
          entropy_coeff: 0.009999999999999998
          kl: 0.012461287766073002
          policy_loss: -0.02412180875738462
          total_loss: -0.011022664854923885
          vf_explained_var: 0.12048608809709549
          vf_loss: 0.01669522628395094
    num_agent_steps_sampled: 606000
    num_agent_steps_trained: 606000
    num_steps_sampled: 606000
    num_steps_trained: 606

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,606,26292.6,606000,-2.4277,-2.01,-2.73,242.77


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 607000
  custom_metrics: {}
  date: 2021-10-24_23-49-56
  done: false
  episode_len_mean: 243.06
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.430599999999992
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 4
  episodes_total: 2238
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.892355600391893e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6842496941486994
          entropy_coeff: 0.009999999999999998
          kl: 0.04741272050036969
          policy_loss: 0.0002750244405534532
          total_loss: 0.005810405065615972
          vf_explained_var: 0.20799782872200012
          vf_loss: 0.01237787858893474
    num_agent_steps_sampled: 607000
    num_agent_steps_trained: 607000
    num_steps_sampled: 607000
    num_steps_trained: 607000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,607,26336.3,607000,-2.4306,-2.01,-2.73,243.06


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 608000
  custom_metrics: {}
  date: 2021-10-24_23-50-39
  done: false
  episode_len_mean: 243.8
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4379999999999917
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 3
  episodes_total: 2241
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.838533400587836e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6997785462273491
          entropy_coeff: 0.009999999999999998
          kl: 0.009736115986511053
          policy_loss: -0.07702564108702871
          total_loss: -0.0723534488843547
          vf_explained_var: 0.2049623280763626
          vf_loss: 0.011669979058206081
    num_agent_steps_sampled: 608000
    num_agent_steps_trained: 608000
    num_steps_sampled: 608000
    num_steps_trained: 608000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,608,26379.2,608000,-2.438,-2.01,-2.73,243.8


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 609000
  custom_metrics: {}
  date: 2021-10-24_23-51-21
  done: false
  episode_len_mean: 244.19
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4418999999999915
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 4
  episodes_total: 2245
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.838533400587836e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6897434314092
          entropy_coeff: 0.009999999999999998
          kl: 0.009249856292090582
          policy_loss: -0.025184297727213964
          total_loss: -0.020052258835898507
          vf_explained_var: 0.2642543315887451
          vf_loss: 0.012029471558829148
    num_agent_steps_sampled: 609000
    num_agent_steps_trained: 609000
    num_steps_sampled: 609000
    num_steps_trained: 609000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,609,26421.6,609000,-2.4419,-2.01,-2.73,244.19


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 610000
  custom_metrics: {}
  date: 2021-10-24_23-52-04
  done: false
  episode_len_mean: 244.99
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4498999999999915
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 4
  episodes_total: 2249
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.838533400587836e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6877888997395833
          entropy_coeff: 0.009999999999999998
          kl: 0.00690862569745813
          policy_loss: 0.0007985943721400367
          total_loss: 0.006082758473025428
          vf_explained_var: 0.22428953647613525
          vf_loss: 0.012162051856931713
    num_agent_steps_sampled: 610000
    num_agent_steps_trained: 610000
    num_steps_sampled: 610000
    num_steps_trained: 6100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,610,26464.1,610000,-2.4499,-2.01,-2.73,244.99




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 611000
  custom_metrics: {}
  date: 2021-10-24_23-53-08
  done: false
  episode_len_mean: 244.97
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4496999999999916
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 4
  episodes_total: 2253
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.838533400587836e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.5237614668077892
          entropy_coeff: 0.009999999999999998
          kl: 0.008189332296190502
          policy_loss: -0.02739490783876843
          total_loss: -0.020381553471088408
          vf_explained_var: 0.16808563470840454
          vf_loss: 0.012250968813896179
    num_agent_steps_sampled: 611000
    num_agent_steps_trained: 611000
    num_steps_sampled: 611000
    num_steps_trained: 611

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,611,26528.7,611000,-2.4497,-2.01,-2.73,244.97


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 612000
  custom_metrics: {}
  date: 2021-10-24_23-53-48
  done: false
  episode_len_mean: 244.93
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.449299999999992
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 4
  episodes_total: 2257
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.838533400587836e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.4965304457479053
          entropy_coeff: 0.009999999999999998
          kl: 0.009003076581067089
          policy_loss: -0.04730055671599176
          total_loss: -0.039663920137617326
          vf_explained_var: 0.153157040476799
          vf_loss: 0.012601940696024232
    num_agent_steps_sampled: 612000
    num_agent_steps_trained: 612000
    num_steps_sampled: 612000
    num_steps_trained: 612000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,612,26567.9,612000,-2.4493,-2.01,-2.73,244.93


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 613000
  custom_metrics: {}
  date: 2021-10-24_23-54-34
  done: false
  episode_len_mean: 244.98
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4497999999999918
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 5
  episodes_total: 2262
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.838533400587836e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.5222212384144466
          entropy_coeff: 0.009999999999999998
          kl: 0.005560997411329696
          policy_loss: -0.02467312953538365
          total_loss: -0.014068001260360083
          vf_explained_var: 0.12043169885873795
          vf_loss: 0.015827339608222246
    num_agent_steps_sampled: 613000
    num_agent_steps_trained: 613000
    num_steps_sampled: 613000
    num_steps_trained: 613

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,613,26614.2,613000,-2.4498,-2.01,-2.73,244.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 614000
  custom_metrics: {}
  date: 2021-10-24_23-55-20
  done: false
  episode_len_mean: 244.75
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4474999999999913
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 4
  episodes_total: 2266
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.838533400587836e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.5351452913549212
          entropy_coeff: 0.009999999999999998
          kl: 0.005519389044766222
          policy_loss: -0.008808504541714985
          total_loss: -0.0013212141063478258
          vf_explained_var: 0.17675794661045074
          vf_loss: 0.012838745551804701
    num_agent_steps_sampled: 614000
    num_agent_steps_trained: 614000
    num_steps_sampled: 614000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,614,26660,614000,-2.4475,-2.01,-2.73,244.75


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 615000
  custom_metrics: {}
  date: 2021-10-24_23-56-03
  done: false
  episode_len_mean: 244.6
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.445999999999992
  episode_reward_min: -2.7299999999999858
  episodes_this_iter: 4
  episodes_total: 2270
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.838533400587836e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.471722459130817
          entropy_coeff: 0.009999999999999998
          kl: 0.0056112786043814155
          policy_loss: 0.015610444380177392
          total_loss: 0.02395616008175744
          vf_explained_var: 0.12629589438438416
          vf_loss: 0.013062944377048148
    num_agent_steps_sampled: 615000
    num_agent_steps_trained: 615000
    num_steps_sampled: 615000
    num_steps_trained: 615000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,615,26703.4,615000,-2.446,-2.01,-2.73,244.6


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 616000
  custom_metrics: {}
  date: 2021-10-24_23-56-49
  done: false
  episode_len_mean: 243.88
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4387999999999916
  episode_reward_min: -2.6999999999999864
  episodes_this_iter: 4
  episodes_total: 2274
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.838533400587836e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.43397341171900433
          entropy_coeff: 0.009999999999999998
          kl: 0.005226469081196683
          policy_loss: 0.02020931099024084
          total_loss: 0.029662680584523413
          vf_explained_var: 0.09587499499320984
          vf_loss: 0.013793101078934139
    num_agent_steps_sampled: 616000
    num_agent_steps_trained: 616000
    num_steps_sampled: 616000
    num_steps_trained: 6160

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,616,26749.4,616000,-2.4388,-2.01,-2.7,243.88


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 617000
  custom_metrics: {}
  date: 2021-10-24_23-57-35
  done: false
  episode_len_mean: 243.63
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.436299999999992
  episode_reward_min: -2.6999999999999864
  episodes_this_iter: 4
  episodes_total: 2278
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.838533400587836e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.5119518316454358
          entropy_coeff: 0.009999999999999998
          kl: 0.0057817122157419795
          policy_loss: 0.018842486499084366
          total_loss: 0.02771402924425072
          vf_explained_var: 0.10400362312793732
          vf_loss: 0.013991058410869704
    num_agent_steps_sampled: 617000
    num_agent_steps_trained: 617000
    num_steps_sampled: 617000
    num_steps_trained: 61700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,617,26795.3,617000,-2.4363,-2.01,-2.7,243.63




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 618000
  custom_metrics: {}
  date: 2021-10-24_23-58-39
  done: false
  episode_len_mean: 243.38
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4337999999999917
  episode_reward_min: -2.6999999999999864
  episodes_this_iter: 4
  episodes_total: 2282
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.838533400587836e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.5481977522373199
          entropy_coeff: 0.009999999999999998
          kl: 0.006023339413836566
          policy_loss: -0.027902707705895105
          total_loss: -0.019471437318457498
          vf_explained_var: 0.12764325737953186
          vf_loss: 0.013913250828368796
    num_agent_steps_sampled: 618000
    num_agent_steps_trained: 618000
    num_steps_sampled: 618000
    num_steps_trained: 61

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,618,26858.9,618000,-2.4338,-2.01,-2.7,243.38


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 619000
  custom_metrics: {}
  date: 2021-10-24_23-59-23
  done: false
  episode_len_mean: 243.82
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.438199999999992
  episode_reward_min: -2.6999999999999864
  episodes_this_iter: 4
  episodes_total: 2286
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.838533400587836e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.5445759905709161
          entropy_coeff: 0.009999999999999998
          kl: 0.004728643928106951
          policy_loss: -0.07833692787422074
          total_loss: -0.07044323028789626
          vf_explained_var: 0.16777852177619934
          vf_loss: 0.013339457474648953
    num_agent_steps_sampled: 619000
    num_agent_steps_trained: 619000
    num_steps_sampled: 619000
    num_steps_trained: 61900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,619,26903.3,619000,-2.4382,-2.01,-2.7,243.82


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 620000
  custom_metrics: {}
  date: 2021-10-25_00-00-05
  done: false
  episode_len_mean: 244.24
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.4423999999999917
  episode_reward_min: -2.6999999999999864
  episodes_this_iter: 4
  episodes_total: 2290
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.919266700293918e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.5298352171977361
          entropy_coeff: 0.009999999999999998
          kl: 0.005886307150267574
          policy_loss: -0.0886978834039635
          total_loss: -0.07858248154322306
          vf_explained_var: 0.16520439088344574
          vf_loss: 0.015413755830377341
    num_agent_steps_sampled: 620000
    num_agent_steps_trained: 620000
    num_steps_sampled: 620000
    num_steps_trained: 6200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,620,26944.5,620000,-2.4424,-2.13,-2.7,244.24


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 621000
  custom_metrics: {}
  date: 2021-10-25_00-00-53
  done: false
  episode_len_mean: 243.84
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.4383999999999917
  episode_reward_min: -2.6999999999999864
  episodes_this_iter: 5
  episodes_total: 2295
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.919266700293918e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.603259332312478
          entropy_coeff: 0.009999999999999998
          kl: 0.010066956158640592
          policy_loss: -0.013234977589713202
          total_loss: -0.004317644817961587
          vf_explained_var: 0.23610390722751617
          vf_loss: 0.014949926661534442
    num_agent_steps_sampled: 621000
    num_agent_steps_trained: 621000
    num_steps_sampled: 621000
    num_steps_trained: 62

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,621,26993,621000,-2.4384,-2.13,-2.7,243.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 622000
  custom_metrics: {}
  date: 2021-10-25_00-01-38
  done: false
  episode_len_mean: 243.8
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.4379999999999917
  episode_reward_min: -2.6999999999999864
  episodes_this_iter: 4
  episodes_total: 2299
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.919266700293918e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7662873069445292
          entropy_coeff: 0.009999999999999998
          kl: 0.03544344926293322
          policy_loss: 0.03174356433252493
          total_loss: 0.03448319675193893
          vf_explained_var: 0.2766030728816986
          vf_loss: 0.010402505588717758
    num_agent_steps_sampled: 622000
    num_agent_steps_trained: 622000
    num_steps_sampled: 622000
    num_steps_trained: 622000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,622,27037.8,622000,-2.438,-2.13,-2.7,243.8


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 623000
  custom_metrics: {}
  date: 2021-10-25_00-02-23
  done: false
  episode_len_mean: 243.96
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.439599999999992
  episode_reward_min: -2.6999999999999864
  episodes_this_iter: 4
  episodes_total: 2303
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3789000504408795e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.8072673797607421
          entropy_coeff: 0.009999999999999998
          kl: 0.015641468101841995
          policy_loss: 0.01220167295800315
          total_loss: 0.01790595410598649
          vf_explained_var: 0.2278325855731964
          vf_loss: 0.013776953228645854
    num_agent_steps_sampled: 623000
    num_agent_steps_trained: 623000
    num_steps_sampled: 623000
    num_steps_trained: 623000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,623,27082.4,623000,-2.4396,-2.13,-2.7,243.96


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 624000
  custom_metrics: {}
  date: 2021-10-25_00-03-08
  done: false
  episode_len_mean: 244.09
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.440899999999992
  episode_reward_min: -2.6999999999999864
  episodes_this_iter: 4
  episodes_total: 2307
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3789000504408795e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.8353702949153052
          entropy_coeff: 0.009999999999999998
          kl: 0.018289159180723876
          policy_loss: 0.013305091112852097
          total_loss: 0.018402452684111066
          vf_explained_var: 0.2131614089012146
          vf_loss: 0.013451065868139267
    num_agent_steps_sampled: 624000
    num_agent_steps_trained: 624000
    num_steps_sampled: 624000
    num_steps_trained: 6240

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,624,27127.6,624000,-2.4409,-2.13,-2.7,244.09




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 625000
  custom_metrics: {}
  date: 2021-10-25_00-04-09
  done: false
  episode_len_mean: 244.52
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.4451999999999914
  episode_reward_min: -2.6999999999999864
  episodes_this_iter: 4
  episodes_total: 2311
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3789000504408795e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.8047008143530952
          entropy_coeff: 0.009999999999999998
          kl: 0.010960465640298227
          policy_loss: -0.002803683943218655
          total_loss: 0.0023308551145924464
          vf_explained_var: 0.23242418467998505
          vf_loss: 0.013181547520475255
    num_agent_steps_sampled: 625000
    num_agent_steps_trained: 625000
    num_steps_sampled: 625000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,625,27188.8,625000,-2.4452,-2.13,-2.7,244.52


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 626000
  custom_metrics: {}
  date: 2021-10-25_00-04-53
  done: false
  episode_len_mean: 244.92
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.449199999999992
  episode_reward_min: -2.6999999999999864
  episodes_this_iter: 4
  episodes_total: 2315
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3789000504408795e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7050813972949982
          entropy_coeff: 0.009999999999999998
          kl: 0.009802166325155466
          policy_loss: 0.028607166641288335
          total_loss: 0.03343654001752536
          vf_explained_var: 0.10735044628381729
          vf_loss: 0.011880185796568792
    num_agent_steps_sampled: 626000
    num_agent_steps_trained: 626000
    num_steps_sampled: 626000
    num_steps_trained: 6260

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,626,27233.1,626000,-2.4492,-2.13,-2.7,244.92


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 627000
  custom_metrics: {}
  date: 2021-10-25_00-05-37
  done: false
  episode_len_mean: 245.27
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.4526999999999917
  episode_reward_min: -2.6999999999999864
  episodes_this_iter: 4
  episodes_total: 2319
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3789000504408795e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7999066882663303
          entropy_coeff: 0.009999999999999998
          kl: 0.015288869350523586
          policy_loss: 0.030037395366364055
          total_loss: 0.03425873075094488
          vf_explained_var: 0.09018281102180481
          vf_loss: 0.01222040190671881
    num_agent_steps_sampled: 627000
    num_agent_steps_trained: 627000
    num_steps_sampled: 627000
    num_steps_trained: 6270

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,627,27276.7,627000,-2.4527,-2.13,-2.7,245.27


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 628000
  custom_metrics: {}
  date: 2021-10-25_00-06-15
  done: false
  episode_len_mean: 246.36
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.463599999999991
  episode_reward_min: -2.7399999999999856
  episodes_this_iter: 3
  episodes_total: 2322
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3789000504408795e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.9744798110591041
          entropy_coeff: 0.009999999999999998
          kl: 0.02488204924946014
          policy_loss: -0.09649151927895017
          total_loss: -0.09287275150418281
          vf_explained_var: 0.18476422131061554
          vf_loss: 0.013363567408588198
    num_agent_steps_sampled: 628000
    num_agent_steps_trained: 628000
    num_steps_sampled: 628000
    num_steps_trained: 6280

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,628,27315.2,628000,-2.4636,-2.13,-2.74,246.36


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 629000
  custom_metrics: {}
  date: 2021-10-25_00-06-56
  done: false
  episode_len_mean: 247.62
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.476199999999991
  episode_reward_min: -2.7999999999999843
  episodes_this_iter: 4
  episodes_total: 2326
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.568350075661319e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.9947937342855665
          entropy_coeff: 0.009999999999999998
          kl: 0.028673940185417735
          policy_loss: 0.010953254790769683
          total_loss: 0.014927160408761766
          vf_explained_var: 0.2286733239889145
          vf_loss: 0.01392184119257662
    num_agent_steps_sampled: 629000
    num_agent_steps_trained: 629000
    num_steps_sampled: 629000
    num_steps_trained: 629000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,629,27355.6,629000,-2.4762,-2.13,-2.8,247.62


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 630000
  custom_metrics: {}
  date: 2021-10-25_00-07-37
  done: false
  episode_len_mean: 249.06
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.490599999999991
  episode_reward_min: -2.8299999999999836
  episodes_this_iter: 4
  episodes_total: 2330
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.852525113491974e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.0525771074824863
          entropy_coeff: 0.009999999999999998
          kl: 0.030761351968167244
          policy_loss: 0.010852640701664818
          total_loss: 0.012685654560724895
          vf_explained_var: 0.3112689256668091
          vf_loss: 0.012358782026502821
    num_agent_steps_sampled: 630000
    num_agent_steps_trained: 630000
    num_steps_sampled: 630000
    num_steps_trained: 63000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,630,27396.5,630000,-2.4906,-2.13,-2.83,249.06


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 631000
  custom_metrics: {}
  date: 2021-10-25_00-08-15
  done: false
  episode_len_mean: 250.38
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.5037999999999903
  episode_reward_min: -2.909999999999982
  episodes_this_iter: 3
  episodes_total: 2333
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4778787670237964e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0788918342855243
          entropy_coeff: 0.009999999999999998
          kl: 0.023788352968506683
          policy_loss: 0.031408451663123234
          total_loss: 0.02971146081884702
          vf_explained_var: 0.5523468255996704
          vf_loss: 0.009091923733810998
    num_agent_steps_sampled: 631000
    num_agent_steps_trained: 631000
    num_steps_sampled: 631000
    num_steps_trained: 63100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,631,27435,631000,-2.5038,-2.13,-2.91,250.38


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 632000
  custom_metrics: {}
  date: 2021-10-25_00-08-58
  done: false
  episode_len_mean: 252.2
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.52199999999999
  episode_reward_min: -3.0599999999999787
  episodes_this_iter: 4
  episodes_total: 2337
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2168181505356943e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.3435573246743944
          entropy_coeff: 0.009999999999999998
          kl: 0.04468230481654585
          policy_loss: -0.00913366327683131
          total_loss: -0.010738422887192832
          vf_explained_var: 0.5712653398513794
          vf_loss: 0.011830811750971608
    num_agent_steps_sampled: 632000
    num_agent_steps_trained: 632000
    num_steps_sampled: 632000
    num_steps_trained: 632000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,632,27478.1,632000,-2.522,-2.13,-3.06,252.2




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 633000
  custom_metrics: {}
  date: 2021-10-25_00-09-54
  done: false
  episode_len_mean: 253.04
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.5303999999999895
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 3
  episodes_total: 2340
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.325227225803541e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.4497721142239042
          entropy_coeff: 0.009999999999999998
          kl: 0.025073667580045842
          policy_loss: 0.017521077394485475
          total_loss: 0.011472456322775947
          vf_explained_var: 0.6908606290817261
          vf_loss: 0.008449095971364942
    num_agent_steps_sampled: 633000
    num_agent_steps_trained: 633000
    num_steps_sampled: 633000
    num_steps_trained: 6330

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,633,27533.5,633000,-2.5304,-2.13,-3.13,253.04


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 634000
  custom_metrics: {}
  date: 2021-10-25_00-10-36
  done: false
  episode_len_mean: 254.2
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.5419999999999896
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 2344
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.987840838705313e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.065525119834476
          entropy_coeff: 0.009999999999999998
          kl: 0.015344267303925819
          policy_loss: -0.020721819168991514
          total_loss: -0.019917405106955104
          vf_explained_var: 0.6011172533035278
          vf_loss: 0.01145966827041573
    num_agent_steps_sampled: 634000
    num_agent_steps_trained: 634000
    num_steps_sampled: 634000
    num_steps_trained: 63400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,634,27575.1,634000,-2.542,-2.13,-3.13,254.2


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 635000
  custom_metrics: {}
  date: 2021-10-25_00-11-18
  done: false
  episode_len_mean: 254.28
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.54279999999999
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 3
  episodes_total: 2347
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.987840838705313e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7978170149856143
          entropy_coeff: 0.009999999999999998
          kl: 0.01215549468257172
          policy_loss: -0.08187743690278795
          total_loss: -0.07953651001056035
          vf_explained_var: 0.5499986410140991
          vf_loss: 0.010319097329758935
    num_agent_steps_sampled: 635000
    num_agent_steps_trained: 635000
    num_steps_sampled: 635000
    num_steps_trained: 635000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,635,27618,635000,-2.5428,-2.13,-3.13,254.28


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 636000
  custom_metrics: {}
  date: 2021-10-25_00-11-56
  done: false
  episode_len_mean: 255.1
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.550999999999989
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 2351
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.987840838705313e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8611133330398135
          entropy_coeff: 0.009999999999999998
          kl: 0.00858435976960763
          policy_loss: -0.053006614330742094
          total_loss: -0.04944615686933199
          vf_explained_var: 0.48992758989334106
          vf_loss: 0.01217159049378501
    num_agent_steps_sampled: 636000
    num_agent_steps_trained: 636000
    num_steps_sampled: 636000
    num_steps_trained: 636000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,636,27655.6,636000,-2.551,-2.13,-3.13,255.1


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 637000
  custom_metrics: {}
  date: 2021-10-25_00-12-38
  done: false
  episode_len_mean: 255.57
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.555699999999989
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 2355
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.987840838705313e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7083538618352678
          entropy_coeff: 0.009999999999999998
          kl: 0.010872668964966989
          policy_loss: 0.01680309023294184
          total_loss: 0.022261132506860626
          vf_explained_var: 0.36812490224838257
          vf_loss: 0.012541580065670941
    num_agent_steps_sampled: 637000
    num_agent_steps_trained: 637000
    num_steps_sampled: 637000
    num_steps_trained: 63700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,637,27697.7,637000,-2.5557,-2.13,-3.13,255.57


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 638000
  custom_metrics: {}
  date: 2021-10-25_00-13-23
  done: false
  episode_len_mean: 255.94
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.559399999999989
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 2359
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.987840838705313e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7033700585365296
          entropy_coeff: 0.009999999999999998
          kl: 0.008285706542861407
          policy_loss: 0.017877248177925745
          total_loss: 0.023439743287033504
          vf_explained_var: 0.31692197918891907
          vf_loss: 0.012596193597548537
    num_agent_steps_sampled: 638000
    num_agent_steps_trained: 638000
    num_steps_sampled: 638000
    num_steps_trained: 6380

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,638,27741.9,638000,-2.5594,-2.13,-3.13,255.94


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 639000
  custom_metrics: {}
  date: 2021-10-25_00-14-08
  done: false
  episode_len_mean: 256.11
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.5610999999999895
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 2363
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.987840838705313e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6229433662361569
          entropy_coeff: 0.009999999999999998
          kl: 0.007374405156533233
          policy_loss: 0.023452655225992203
          total_loss: 0.029777303917540444
          vf_explained_var: 0.19519293308258057
          vf_loss: 0.012554083795597156
    num_agent_steps_sampled: 639000
    num_agent_steps_trained: 639000
    num_steps_sampled: 639000
    num_steps_trained: 639

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,639,27787.6,639000,-2.5611,-2.13,-3.13,256.11


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 640000
  custom_metrics: {}
  date: 2021-10-25_00-14-51
  done: false
  episode_len_mean: 256.46
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.564599999999989
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 2367
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.987840838705313e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6719031261073218
          entropy_coeff: 0.009999999999999998
          kl: 0.004207806890564056
          policy_loss: 0.0303065816561381
          total_loss: 0.03642718030346764
          vf_explained_var: 0.1617593914270401
          vf_loss: 0.012839633712751998
    num_agent_steps_sampled: 640000
    num_agent_steps_trained: 640000
    num_steps_sampled: 640000
    num_steps_trained: 640000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,640,27830.7,640000,-2.5646,-2.13,-3.13,256.46




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 641000
  custom_metrics: {}
  date: 2021-10-25_00-15-55
  done: false
  episode_len_mean: 256.19
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.561899999999989
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 2371
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4939204193526565e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6000946872764163
          entropy_coeff: 0.009999999999999998
          kl: 0.005757132392106238
          policy_loss: -0.017452821963363223
          total_loss: -0.010486479269133674
          vf_explained_var: 0.2234450727701187
          vf_loss: 0.01296728861828645
    num_agent_steps_sampled: 641000
    num_agent_steps_trained: 641000
    num_steps_sampled: 641000
    num_steps_trained: 641

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,641,27894.2,641000,-2.5619,-2.13,-3.13,256.19


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 642000
  custom_metrics: {}
  date: 2021-10-25_00-16-40
  done: false
  episode_len_mean: 256.48
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.5647999999999893
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 2375
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4939204193526565e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6280116611056857
          entropy_coeff: 0.009999999999999998
          kl: 0.007452996392163808
          policy_loss: -0.06673233840200636
          total_loss: -0.059812251644002064
          vf_explained_var: 0.17788973450660706
          vf_loss: 0.013200204591784212
    num_agent_steps_sampled: 642000
    num_agent_steps_trained: 642000
    num_steps_sampled: 642000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,642,27939.3,642000,-2.5648,-2.13,-3.13,256.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 643000
  custom_metrics: {}
  date: 2021-10-25_00-17-24
  done: false
  episode_len_mean: 256.99
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.5698999999999894
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 2379
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4939204193526565e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6934546185864343
          entropy_coeff: 0.009999999999999998
          kl: 0.008823195114101642
          policy_loss: -0.01903973701927397
          total_loss: -0.013151229669650396
          vf_explained_var: 0.19450926780700684
          vf_loss: 0.012823055249949296
    num_agent_steps_sampled: 643000
    num_agent_steps_trained: 643000
    num_steps_sampled: 643000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,643,27983.1,643000,-2.5699,-2.13,-3.13,256.99


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 644000
  custom_metrics: {}
  date: 2021-10-25_00-18-03
  done: false
  episode_len_mean: 257.38
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.5737999999999888
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 2383
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4939204193526565e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6870665887991587
          entropy_coeff: 0.009999999999999998
          kl: 0.004982997798575125
          policy_loss: -0.013982154594527351
          total_loss: -0.008301326715283924
          vf_explained_var: 0.26580116152763367
          vf_loss: 0.012551493922041522
    num_agent_steps_sampled: 644000
    num_agent_steps_trained: 644000
    num_steps_sampled: 644000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,644,28022.5,644000,-2.5738,-2.13,-3.13,257.38


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 645000
  custom_metrics: {}
  date: 2021-10-25_00-18-45
  done: false
  episode_len_mean: 258.05
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.5804999999999887
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 2387
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2469602096763283e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7962512075901031
          entropy_coeff: 0.009999999999999998
          kl: 0.008954481162613546
          policy_loss: 0.010821729236178928
          total_loss: 0.015266965246862836
          vf_explained_var: 0.26031625270843506
          vf_loss: 0.012407747449146377
    num_agent_steps_sampled: 645000
    num_agent_steps_trained: 645000
    num_steps_sampled: 645000
    num_steps_trained: 64

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,645,28064.4,645000,-2.5805,-2.13,-3.13,258.05


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 646000
  custom_metrics: {}
  date: 2021-10-25_00-19-28
  done: false
  episode_len_mean: 258.87
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.5886999999999887
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 2391
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2469602096763283e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7187874972820282
          entropy_coeff: 0.009999999999999998
          kl: 0.014720695026527942
          policy_loss: 0.02022454349531068
          total_loss: 0.02576003529959255
          vf_explained_var: 0.2567721903324127
          vf_loss: 0.012723365146666766
    num_agent_steps_sampled: 646000
    num_agent_steps_trained: 646000
    num_steps_sampled: 646000
    num_steps_trained: 64600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,646,28107.2,646000,-2.5887,-2.13,-3.13,258.87


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 647000
  custom_metrics: {}
  date: 2021-10-25_00-20-12
  done: false
  episode_len_mean: 259.45
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.594499999999989
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 2395
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2469602096763283e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6319825688997904
          entropy_coeff: 0.009999999999999998
          kl: 0.007643269511389406
          policy_loss: 0.05307880789041519
          total_loss: 0.0575853082868788
          vf_explained_var: 0.21389544010162354
          vf_loss: 0.010826325768397914
    num_agent_steps_sampled: 647000
    num_agent_steps_trained: 647000
    num_steps_sampled: 647000
    num_steps_trained: 647000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,647,28150.8,647000,-2.5945,-2.13,-3.13,259.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 648000
  custom_metrics: {}
  date: 2021-10-25_00-20-53
  done: false
  episode_len_mean: 259.94
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.599399999999988
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 3
  episodes_total: 2398
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2469602096763283e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8307977504200406
          entropy_coeff: 0.009999999999999998
          kl: 0.013609358044259383
          policy_loss: -0.08808079403307703
          total_loss: -0.08458492987685734
          vf_explained_var: 0.3088882863521576
          vf_loss: 0.011803843494918611
    num_agent_steps_sampled: 648000
    num_agent_steps_trained: 648000
    num_steps_sampled: 648000
    num_steps_trained: 6480

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,648,28192.4,648000,-2.5994,-2.13,-3.13,259.94




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 649000
  custom_metrics: {}
  date: 2021-10-25_00-21-56
  done: false
  episode_len_mean: 260.64
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.6063999999999883
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 5
  episodes_total: 2403
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2469602096763283e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7024662438366148
          entropy_coeff: 0.009999999999999998
          kl: 0.01981097650692989
          policy_loss: -0.025021448896990883
          total_loss: -0.01564402340186967
          vf_explained_var: 0.3186355233192444
          vf_loss: 0.016402086046420867
    num_agent_steps_sampled: 649000
    num_agent_steps_trained: 649000
    num_steps_sampled: 649000
    num_steps_trained: 649

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,649,28255.4,649000,-2.6064,-2.13,-3.5,260.64


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 650000
  custom_metrics: {}
  date: 2021-10-25_00-22-39
  done: false
  episode_len_mean: 261.05
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.610499999999988
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 3
  episodes_total: 2406
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2469602096763283e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9928629716237386
          entropy_coeff: 0.009999999999999998
          kl: 0.042399121633171354
          policy_loss: 0.062486041420035894
          total_loss: 0.06189716201689508
          vf_explained_var: 0.5747969150543213
          vf_loss: 0.009339750254164553
    num_agent_steps_sampled: 650000
    num_agent_steps_trained: 650000
    num_steps_sampled: 650000
    num_steps_trained: 65000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,650,28298.5,650000,-2.6105,-2.13,-3.5,261.05


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 651000
  custom_metrics: {}
  date: 2021-10-25_00-23-21
  done: false
  episode_len_mean: 262.24
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.6223999999999874
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2410
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8704403145144921e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.000388001733356
          entropy_coeff: 0.009999999999999998
          kl: 0.012185374002999893
          policy_loss: 0.013821191837390263
          total_loss: 0.016847034874889585
          vf_explained_var: 0.31037473678588867
          vf_loss: 0.013029721441368262
    num_agent_steps_sampled: 651000
    num_agent_steps_trained: 651000
    num_steps_sampled: 651000
    num_steps_trained: 651

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,651,28339.9,651000,-2.6224,-2.13,-3.5,262.24


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 652000
  custom_metrics: {}
  date: 2021-10-25_00-24-02
  done: false
  episode_len_mean: 262.77
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.6276999999999875
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 3
  episodes_total: 2413
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8704403145144921e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0167461733023326
          entropy_coeff: 0.009999999999999998
          kl: 0.015946010255103716
          policy_loss: -0.09168604769640498
          total_loss: -0.08898306878076659
          vf_explained_var: 0.38982635736465454
          vf_loss: 0.012870438531455067
    num_agent_steps_sampled: 652000
    num_agent_steps_trained: 652000
    num_steps_sampled: 652000
    num_steps_trained: 65

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,652,28380.9,652000,-2.6277,-2.13,-3.5,262.77


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 653000
  custom_metrics: {}
  date: 2021-10-25_00-24-40
  done: false
  episode_len_mean: 264.24
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.6423999999999874
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2417
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8704403145144921e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2218410982025993
          entropy_coeff: 0.009999999999999998
          kl: 0.011313928901771141
          policy_loss: 0.01852574294639958
          total_loss: 0.018224579261408913
          vf_explained_var: 0.5230410695075989
          vf_loss: 0.011917244994805919
    num_agent_steps_sampled: 653000
    num_agent_steps_trained: 653000
    num_steps_sampled: 653000
    num_steps_trained: 6530

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,653,28418.7,653000,-2.6424,-2.13,-3.5,264.24


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 654000
  custom_metrics: {}
  date: 2021-10-25_00-25-22
  done: false
  episode_len_mean: 264.78
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.6477999999999873
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2421
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8704403145144921e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0240388082133398
          entropy_coeff: 0.009999999999999998
          kl: 0.010633856263464135
          policy_loss: -0.01296038602789243
          total_loss: -0.011327494266960355
          vf_explained_var: 0.4708385467529297
          vf_loss: 0.01187327845642964
    num_agent_steps_sampled: 654000
    num_agent_steps_trained: 654000
    num_steps_sampled: 654000
    num_steps_trained: 654

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,654,28461.3,654000,-2.6478,-2.13,-3.5,264.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 655000
  custom_metrics: {}
  date: 2021-10-25_00-26-06
  done: false
  episode_len_mean: 264.26
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.642599999999988
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 3
  episodes_total: 2424
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8704403145144921e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7266831152968937
          entropy_coeff: 0.009999999999999998
          kl: 0.011399544803937386
          policy_loss: -0.1174802877008915
          total_loss: -0.11289448771211837
          vf_explained_var: 0.366782009601593
          vf_loss: 0.011852628851516379
    num_agent_steps_sampled: 655000
    num_agent_steps_trained: 655000
    num_steps_sampled: 655000
    num_steps_trained: 655000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,655,28505.2,655000,-2.6426,-2.13,-3.5,264.26


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 656000
  custom_metrics: {}
  date: 2021-10-25_00-26-50
  done: false
  episode_len_mean: 263.61
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.6360999999999883
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2428
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8704403145144921e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6982635080814361
          entropy_coeff: 0.009999999999999998
          kl: 0.010615504288249393
          policy_loss: -0.10888855482141177
          total_loss: -0.10203235985504257
          vf_explained_var: 0.3050629794597626
          vf_loss: 0.013838830331547394
    num_agent_steps_sampled: 656000
    num_agent_steps_trained: 656000
    num_steps_sampled: 656000
    num_steps_trained: 656

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,656,28548.9,656000,-2.6361,-2.13,-3.5,263.61




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 657000
  custom_metrics: {}
  date: 2021-10-25_00-27-54
  done: false
  episode_len_mean: 261.85
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.6184999999999876
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 5
  episodes_total: 2433
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8704403145144921e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6197890732023451
          entropy_coeff: 0.009999999999999998
          kl: 0.010245179410732734
          policy_loss: -0.03425726965069771
          total_loss: -0.025024390717347463
          vf_explained_var: 0.21736547350883484
          vf_loss: 0.015430768651680814
    num_agent_steps_sampled: 657000
    num_agent_steps_trained: 657000
    num_steps_sampled: 657000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,657,28612.6,657000,-2.6185,-2.13,-3.5,261.85


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 658000
  custom_metrics: {}
  date: 2021-10-25_00-28-38
  done: false
  episode_len_mean: 260.23
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.602299999999988
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2437
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8704403145144921e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5638083808951908
          entropy_coeff: 0.009999999999999998
          kl: 0.008433887949229302
          policy_loss: -0.0003064339359601339
          total_loss: 0.00567450291580624
          vf_explained_var: 0.1812300831079483
          vf_loss: 0.011619020036111276
    num_agent_steps_sampled: 658000
    num_agent_steps_trained: 658000
    num_steps_sampled: 658000
    num_steps_trained: 658

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,658,28656.7,658000,-2.6023,-2.13,-3.5,260.23


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 659000
  custom_metrics: {}
  date: 2021-10-25_00-29-21
  done: false
  episode_len_mean: 259.4
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.5939999999999888
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 3
  episodes_total: 2440
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8704403145144921e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5685978578196631
          entropy_coeff: 0.009999999999999998
          kl: 0.006033082348065477
          policy_loss: -0.10262742191553116
          total_loss: -0.09625592662228478
          vf_explained_var: 0.12116324156522751
          vf_loss: 0.012057471968647506
    num_agent_steps_sampled: 659000
    num_agent_steps_trained: 659000
    num_steps_sampled: 659000
    num_steps_trained: 659

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,659,28699.4,659000,-2.594,-2.13,-3.5,259.4


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 660000
  custom_metrics: {}
  date: 2021-10-25_00-30-03
  done: false
  episode_len_mean: 258.08
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.580799999999989
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2444
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8704403145144921e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.525416738126013
          entropy_coeff: 0.009999999999999998
          kl: 0.014352336344465715
          policy_loss: -0.10899761468172073
          total_loss: -0.0997691692577468
          vf_explained_var: 0.12582716345787048
          vf_loss: 0.014482612618141703
    num_agent_steps_sampled: 660000
    num_agent_steps_trained: 660000
    num_steps_sampled: 660000
    num_steps_trained: 66000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,660,28741.7,660000,-2.5808,-2.13,-3.5,258.08


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 661000
  custom_metrics: {}
  date: 2021-10-25_00-30-48
  done: false
  episode_len_mean: 257.14
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.571399999999989
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 5
  episodes_total: 2449
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8704403145144921e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5189806852075789
          entropy_coeff: 0.009999999999999998
          kl: 0.0038236096111581397
          policy_loss: -0.019979102743996516
          total_loss: -0.0094874683353636
          vf_explained_var: 0.1183108314871788
          vf_loss: 0.015681439058648217
    num_agent_steps_sampled: 661000
    num_agent_steps_trained: 661000
    num_steps_sampled: 661000
    num_steps_trained: 661

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,661,28787,661000,-2.5714,-2.13,-3.5,257.14


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 662000
  custom_metrics: {}
  date: 2021-10-25_00-31-30
  done: false
  episode_len_mean: 256.9
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.568999999999989
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 3
  episodes_total: 2452
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.352201572572461e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.5208449814054701
          entropy_coeff: 0.009999999999999998
          kl: 0.009544277659123187
          policy_loss: -0.10569502769245041
          total_loss: -0.09831553250551224
          vf_explained_var: 0.12878867983818054
          vf_loss: 0.012587945173597999
    num_agent_steps_sampled: 662000
    num_agent_steps_trained: 662000
    num_steps_sampled: 662000
    num_steps_trained: 66200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,662,28828.3,662000,-2.569,-2.13,-3.5,256.9


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 663000
  custom_metrics: {}
  date: 2021-10-25_00-32-16
  done: false
  episode_len_mean: 256.07
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.560699999999989
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 5
  episodes_total: 2457
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.352201572572461e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.45477507842911613
          entropy_coeff: 0.009999999999999998
          kl: 0.0039009027777477624
          policy_loss: -0.0197592051492797
          total_loss: -0.00822022416525417
          vf_explained_var: 0.12331392616033554
          vf_loss: 0.016086735245254304
    num_agent_steps_sampled: 663000
    num_agent_steps_trained: 663000
    num_steps_sampled: 663000
    num_steps_trained: 663

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,663,28874.7,663000,-2.5607,-2.13,-3.5,256.07




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 664000
  custom_metrics: {}
  date: 2021-10-25_00-33-20
  done: false
  episode_len_mean: 255.73
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.557299999999989
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2461
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.6761007862862304e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.450309775935279
          entropy_coeff: 0.009999999999999998
          kl: 0.0029280929289645504
          policy_loss: 0.014567881325880687
          total_loss: 0.02278234428829617
          vf_explained_var: 0.1357765793800354
          vf_loss: 0.012717558888511525
    num_agent_steps_sampled: 664000
    num_agent_steps_trained: 664000
    num_steps_sampled: 664000
    num_steps_trained: 664000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,664,28938.4,664000,-2.5573,-2.1,-3.5,255.73


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 665000
  custom_metrics: {}
  date: 2021-10-25_00-34-06
  done: false
  episode_len_mean: 255.44
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.5543999999999896
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2465
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3380503931431152e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.45622413655122124
          entropy_coeff: 0.009999999999999998
          kl: 0.0018918098565519712
          policy_loss: 0.02814448169536061
          total_loss: 0.03665979247954157
          vf_explained_var: 0.123651422560215
          vf_loss: 0.013077553340958224
    num_agent_steps_sampled: 665000
    num_agent_steps_trained: 665000
    num_steps_sampled: 665000
    num_steps_trained: 66500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,665,28984.3,665000,-2.5544,-2.1,-3.5,255.44


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 666000
  custom_metrics: {}
  date: 2021-10-25_00-34-51
  done: false
  episode_len_mean: 255.13
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.5512999999999892
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2469
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1690251965715576e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.3881102051999834
          entropy_coeff: 0.009999999999999998
          kl: 0.00945094062736267
          policy_loss: 0.017053086310625076
          total_loss: 0.026129983365535736
          vf_explained_var: 0.11403331905603409
          vf_loss: 0.012957998882565233
    num_agent_steps_sampled: 666000
    num_agent_steps_trained: 666000
    num_steps_sampled: 666000
    num_steps_trained: 6660

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,666,29029.7,666000,-2.5513,-2.1,-3.5,255.13


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 667000
  custom_metrics: {}
  date: 2021-10-25_00-35-37
  done: false
  episode_len_mean: 255.36
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.553599999999989
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2473
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1690251965715576e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.4602536850505405
          entropy_coeff: 0.009999999999999998
          kl: 0.012552214432844662
          policy_loss: 0.044935437540213265
          total_loss: 0.05163694777422481
          vf_explained_var: 0.06933574378490448
          vf_loss: 0.011304046886248722
    num_agent_steps_sampled: 667000
    num_agent_steps_trained: 667000
    num_steps_sampled: 667000
    num_steps_trained: 66700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,667,29075.2,667000,-2.5536,-2.1,-3.5,255.36


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 668000
  custom_metrics: {}
  date: 2021-10-25_00-36-20
  done: false
  episode_len_mean: 255.13
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.5512999999999897
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2477
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1690251965715576e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.49709428747495016
          entropy_coeff: 0.009999999999999998
          kl: 0.056653102709684725
          policy_loss: -0.010071476341949569
          total_loss: -0.0017941056026352777
          vf_explained_var: 0.10912956297397614
          vf_loss: 0.013248313849584924
    num_agent_steps_sampled: 668000
    num_agent_steps_trained: 668000
    num_steps_sampled: 668000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,668,29118.4,668000,-2.5513,-2.1,-3.5,255.13


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 669000
  custom_metrics: {}
  date: 2021-10-25_00-37-01
  done: false
  episode_len_mean: 255.78
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.557799999999989
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2481
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.753537794857336e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6221138325002459
          entropy_coeff: 0.009999999999999998
          kl: 0.020117496870971627
          policy_loss: 0.01920372491909398
          total_loss: 0.026226061085859936
          vf_explained_var: 0.13733698427677155
          vf_loss: 0.013243471779343154
    num_agent_steps_sampled: 669000
    num_agent_steps_trained: 669000
    num_steps_sampled: 669000
    num_steps_trained: 669000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,669,29159.8,669000,-2.5578,-2.1,-3.5,255.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 670000
  custom_metrics: {}
  date: 2021-10-25_00-37-37
  done: false
  episode_len_mean: 257.12
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.571199999999989
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 3
  episodes_total: 2484
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6303066922860046e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.8317405409283108
          entropy_coeff: 0.009999999999999998
          kl: 0.03966725560099289
          policy_loss: -0.09439508790771166
          total_loss: -0.08918505054381158
          vf_explained_var: 0.22277867794036865
          vf_loss: 0.013527442132019334
    num_agent_steps_sampled: 670000
    num_agent_steps_trained: 670000
    num_steps_sampled: 670000
    num_steps_trained: 67000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,670,29196,670000,-2.5712,-2.1,-3.5,257.12


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 671000
  custom_metrics: {}
  date: 2021-10-25_00-38-16
  done: false
  episode_len_mean: 258.06
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.5805999999999893
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2488
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.945460038429008e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.1284154309166803
          entropy_coeff: 0.009999999999999998
          kl: 0.04236008961418805
          policy_loss: -0.01829484324488375
          total_loss: -0.018653306447797353
          vf_explained_var: 0.4734240770339966
          vf_loss: 0.010925691564463907
    num_agent_steps_sampled: 671000
    num_agent_steps_trained: 671000
    num_steps_sampled: 671000
    num_steps_trained: 67100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,671,29235,671000,-2.5806,-2.1,-3.5,258.06




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 672000
  custom_metrics: {}
  date: 2021-10-25_00-39-17
  done: false
  episode_len_mean: 258.67
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.586699999999989
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 3
  episodes_total: 2491
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.918190057643509e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.2386419905556574
          entropy_coeff: 0.009999999999999998
          kl: 0.015057973012933622
          policy_loss: -0.06859811941782633
          total_loss: -0.07112254980537626
          vf_explained_var: 0.6468801498413086
          vf_loss: 0.009861985433639751
    num_agent_steps_sampled: 672000
    num_agent_steps_trained: 672000
    num_steps_sampled: 672000
    num_steps_trained: 672000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,672,29295.3,672000,-2.5867,-2.1,-3.5,258.67


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 673000
  custom_metrics: {}
  date: 2021-10-25_00-39-58
  done: false
  episode_len_mean: 259.62
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.5961999999999885
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2495
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.918190057643509e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.2985362529754638
          entropy_coeff: 0.009999999999999998
          kl: 0.052338084890348885
          policy_loss: 0.03414096451467938
          total_loss: 0.029513500051365958
          vf_explained_var: 0.6587419509887695
          vf_loss: 0.008357894976830317
    num_agent_steps_sampled: 673000
    num_agent_steps_trained: 673000
    num_steps_sampled: 673000
    num_steps_trained: 673000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,673,29336.3,673000,-2.5962,-2.1,-3.5,259.62


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 674000
  custom_metrics: {}
  date: 2021-10-25_00-40-40
  done: false
  episode_len_mean: 259.23
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.5922999999999887
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 4
  episodes_total: 2499
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.877285086465268e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.2350787281990052
          entropy_coeff: 0.009999999999999998
          kl: 0.015877373823728964
          policy_loss: -0.0222760655813747
          total_loss: -0.0244722719821665
          vf_explained_var: 0.6729801893234253
          vf_loss: 0.010154576734122303
    num_agent_steps_sampled: 674000
    num_agent_steps_trained: 674000
    num_steps_sampled: 674000
    num_steps_trained: 674000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,674,29378.9,674000,-2.5923,-2.1,-3.36,259.23


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 675000
  custom_metrics: {}
  date: 2021-10-25_00-41-21
  done: false
  episode_len_mean: 260.49
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6048999999999882
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 3
  episodes_total: 2502
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.877285086465268e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.248545613553789
          entropy_coeff: 0.009999999999999998
          kl: 0.018133568759299043
          policy_loss: 0.02902231216430664
          total_loss: 0.024095112747616238
          vf_explained_var: 0.737872838973999
          vf_loss: 0.007558255984137455
    num_agent_steps_sampled: 675000
    num_agent_steps_trained: 675000
    num_steps_sampled: 675000
    num_steps_trained: 675000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,675,29419.9,675000,-2.6049,-2.1,-3.36,260.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 676000
  custom_metrics: {}
  date: 2021-10-25_00-41-59
  done: false
  episode_len_mean: 261.99
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6198999999999883
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 2505
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.877285086465268e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.7374330242474874
          entropy_coeff: 0.009999999999999998
          kl: 0.05104684058785877
          policy_loss: -0.13228582565983135
          total_loss: -0.13576322396596271
          vf_explained_var: 0.6267150640487671
          vf_loss: 0.01389693251500527
    num_agent_steps_sampled: 676000
    num_agent_steps_trained: 676000
    num_steps_sampled: 676000
    num_steps_trained: 676000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,676,29457.3,676000,-2.6199,-2.1,-3.61,261.99


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 677000
  custom_metrics: {}
  date: 2021-10-25_00-42-42
  done: false
  episode_len_mean: 261.56
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6155999999999877
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2509
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3315927629697901e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.923684134748247
          entropy_coeff: 0.009999999999999998
          kl: 0.013972779185672992
          policy_loss: 0.009464609084857835
          total_loss: 0.009857020527124404
          vf_explained_var: 0.6047163009643555
          vf_loss: 0.00962925377405352
    num_agent_steps_sampled: 677000
    num_agent_steps_trained: 677000
    num_steps_sampled: 677000
    num_steps_trained: 677000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,677,29500.7,677000,-2.6156,-2.1,-3.61,261.56


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 678000
  custom_metrics: {}
  date: 2021-10-25_00-43-24
  done: false
  episode_len_mean: 261.39
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6138999999999886
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2513
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3315927629697901e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9668494502703349
          entropy_coeff: 0.009999999999999998
          kl: 0.008372857622506252
          policy_loss: 0.033771563776665264
          total_loss: 0.034756391454074116
          vf_explained_var: 0.49257516860961914
          vf_loss: 0.010653323731902573
    num_agent_steps_sampled: 678000
    num_agent_steps_trained: 678000
    num_steps_sampled: 678000
    num_steps_trained: 6780

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,678,29542.2,678000,-2.6139,-2.1,-3.61,261.39


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 679000
  custom_metrics: {}
  date: 2021-10-25_00-44-03
  done: false
  episode_len_mean: 260.81
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.608099999999988
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2517
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3315927629697901e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8997800025675032
          entropy_coeff: 0.009999999999999998
          kl: 0.011726329547589663
          policy_loss: -0.008029522746801377
          total_loss: -0.006011683659421073
          vf_explained_var: 0.3792878985404968
          vf_loss: 0.011015638036446439
    num_agent_steps_sampled: 679000
    num_agent_steps_trained: 679000
    num_steps_sampled: 679000
    num_steps_trained: 6790

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,679,29581.7,679000,-2.6081,-2.1,-3.61,260.81




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 680000
  custom_metrics: {}
  date: 2021-10-25_00-45-06
  done: false
  episode_len_mean: 260.01
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.600099999999988
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2521
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3315927629697901e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7759017129739125
          entropy_coeff: 0.009999999999999998
          kl: 0.006572023624129584
          policy_loss: -0.005849598017003801
          total_loss: -0.0020662700136502583
          vf_explained_var: 0.28743645548820496
          vf_loss: 0.01154234628710482
    num_agent_steps_sampled: 680000
    num_agent_steps_trained: 680000
    num_steps_sampled: 680000
    num_steps_trained: 680

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,680,29643.8,680000,-2.6001,-2.1,-3.61,260.01


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 681000
  custom_metrics: {}
  date: 2021-10-25_00-45-49
  done: false
  episode_len_mean: 259.94
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.5993999999999886
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2525
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3315927629697901e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7498703499635061
          entropy_coeff: 0.009999999999999998
          kl: 0.007516345065871501
          policy_loss: -0.012798617780208587
          total_loss: -0.008446269896295336
          vf_explained_var: 0.19632494449615479
          vf_loss: 0.011851052815715472
    num_agent_steps_sampled: 681000
    num_agent_steps_trained: 681000
    num_steps_sampled: 681000
    num_steps_trained: 68

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,681,29687,681000,-2.5994,-2.1,-3.61,259.94


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 682000
  custom_metrics: {}
  date: 2021-10-25_00-46-32
  done: false
  episode_len_mean: 260.08
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.600799999999988
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 2528
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3315927629697901e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7267403331067827
          entropy_coeff: 0.009999999999999998
          kl: 0.005233766779589634
          policy_loss: -0.045771049873696436
          total_loss: -0.042939856482876675
          vf_explained_var: 0.18003372848033905
          vf_loss: 0.010098600113350484
    num_agent_steps_sampled: 682000
    num_agent_steps_trained: 682000
    num_steps_sampled: 682000
    num_steps_trained: 682

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,682,29730,682000,-2.6008,-2.1,-3.61,260.08


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 683000
  custom_metrics: {}
  date: 2021-10-25_00-47-14
  done: false
  episode_len_mean: 260.93
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.609299999999988
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2532
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3315927629697901e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7361171305179596
          entropy_coeff: 0.009999999999999998
          kl: 0.008097211821591897
          policy_loss: 0.00033238414261076187
          total_loss: 0.005829686257574293
          vf_explained_var: 0.14299719035625458
          vf_loss: 0.012858473530246151
    num_agent_steps_sampled: 683000
    num_agent_steps_trained: 683000
    num_steps_sampled: 683000
    num_steps_trained: 683

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,683,29772,683000,-2.6093,-2.1,-3.61,260.93


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 684000
  custom_metrics: {}
  date: 2021-10-25_00-47-56
  done: false
  episode_len_mean: 261.21
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6120999999999883
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2536
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3315927629697901e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.566793891456392
          entropy_coeff: 0.009999999999999998
          kl: 0.004079971234684265
          policy_loss: 0.007909298770957522
          total_loss: 0.015536094870832231
          vf_explained_var: 0.1139838770031929
          vf_loss: 0.013294736109673976
    num_agent_steps_sampled: 684000
    num_agent_steps_trained: 684000
    num_steps_sampled: 684000
    num_steps_trained: 684000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,684,29814.4,684000,-2.6121,-2.1,-3.61,261.21


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 685000
  custom_metrics: {}
  date: 2021-10-25_00-48-42
  done: false
  episode_len_mean: 261.08
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6107999999999874
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2540
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.657963814848951e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.5954680840174357
          entropy_coeff: 0.009999999999999998
          kl: 0.004989127286824808
          policy_loss: 0.03155811097886827
          total_loss: 0.03769155343373617
          vf_explained_var: 0.10518905520439148
          vf_loss: 0.012088122581028276
    num_agent_steps_sampled: 685000
    num_agent_steps_trained: 685000
    num_steps_sampled: 685000
    num_steps_trained: 685000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,685,29859.9,685000,-2.6108,-2.1,-3.61,261.08


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 686000
  custom_metrics: {}
  date: 2021-10-25_00-49-26
  done: false
  episode_len_mean: 261.0
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.609999999999988
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2544
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3289819074244754e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7999918984042274
          entropy_coeff: 0.009999999999999998
          kl: 0.055086368364346605
          policy_loss: 0.02082303124997351
          total_loss: 0.025139906340175204
          vf_explained_var: 0.10702848434448242
          vf_loss: 0.012316795945581462
    num_agent_steps_sampled: 686000
    num_agent_steps_trained: 686000
    num_steps_sampled: 686000
    num_steps_trained: 686000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,686,29903.8,686000,-2.61,-2.1,-3.61,261


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 687000
  custom_metrics: {}
  date: 2021-10-25_00-50-02
  done: false
  episode_len_mean: 262.49
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.624899999999988
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 2547
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.993472861136713e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.0044349683655633
          entropy_coeff: 0.009999999999999998
          kl: 0.013591486290771273
          policy_loss: -0.09370343163609504
          total_loss: -0.08970581135816044
          vf_explained_var: 0.05102333053946495
          vf_loss: 0.01404196945950389
    num_agent_steps_sampled: 687000
    num_agent_steps_trained: 687000
    num_steps_sampled: 687000
    num_steps_trained: 687000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,687,29940.4,687000,-2.6249,-2.1,-3.61,262.49




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 688000
  custom_metrics: {}
  date: 2021-10-25_00-50-58
  done: false
  episode_len_mean: 263.88
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.638799999999988
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2551
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.993472861136713e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.9253517640961542
          entropy_coeff: 0.009999999999999998
          kl: 0.007113114534077328
          policy_loss: 0.014441587196456062
          total_loss: 0.018560759102304775
          vf_explained_var: 0.1304926574230194
          vf_loss: 0.013372688067870007
    num_agent_steps_sampled: 688000
    num_agent_steps_trained: 688000
    num_steps_sampled: 688000
    num_steps_trained: 688000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,688,29996.5,688000,-2.6388,-2.1,-3.61,263.88


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 689000
  custom_metrics: {}
  date: 2021-10-25_00-51-38
  done: false
  episode_len_mean: 264.73
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6472999999999884
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 2554
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.993472861136713e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.8473184155093298
          entropy_coeff: 0.009999999999999998
          kl: 0.011265728667956568
          policy_loss: -0.11314421263005998
          total_loss: -0.1077766615483496
          vf_explained_var: 0.1023111566901207
          vf_loss: 0.013840735776142941
    num_agent_steps_sampled: 689000
    num_agent_steps_trained: 689000
    num_steps_sampled: 689000
    num_steps_trained: 689000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,689,30036.4,689000,-2.6473,-2.1,-3.61,264.73


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 690000
  custom_metrics: {}
  date: 2021-10-25_00-52-18
  done: false
  episode_len_mean: 266.05
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6604999999999874
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2558
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.993472861136713e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7409016609191894
          entropy_coeff: 0.009999999999999998
          kl: 0.007910303265431493
          policy_loss: 0.031166550517082215
          total_loss: 0.03582458347082138
          vf_explained_var: 0.09421275556087494
          vf_loss: 0.01206705289789372
    num_agent_steps_sampled: 690000
    num_agent_steps_trained: 690000
    num_steps_sampled: 690000
    num_steps_trained: 690000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,690,30076.4,690000,-2.6605,-2.1,-3.61,266.05


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 691000
  custom_metrics: {}
  date: 2021-10-25_00-53-00
  done: false
  episode_len_mean: 267.5
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.674999999999987
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2562
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.993472861136713e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.8032829364140829
          entropy_coeff: 0.009999999999999998
          kl: 0.005929295378343902
          policy_loss: 0.023043519796596634
          total_loss: 0.02928658864564366
          vf_explained_var: 0.11683320254087448
          vf_loss: 0.014275898256649574
    num_agent_steps_sampled: 691000
    num_agent_steps_trained: 691000
    num_steps_sampled: 691000
    num_steps_trained: 691000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,691,30117.7,691000,-2.675,-2.34,-3.61,267.5


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 692000
  custom_metrics: {}
  date: 2021-10-25_00-53-42
  done: false
  episode_len_mean: 268.62
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.6861999999999866
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 2565
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.993472861136713e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6837323943773905
          entropy_coeff: 0.009999999999999998
          kl: 0.009190691876798003
          policy_loss: -0.08441657523314158
          total_loss: -0.07823314277662172
          vf_explained_var: 0.07186964899301529
          vf_loss: 0.013020753550032774
    num_agent_steps_sampled: 692000
    num_agent_steps_trained: 692000
    num_steps_sampled: 692000
    num_steps_trained: 69200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,692,30159.5,692000,-2.6862,-2.34,-3.61,268.62


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 693000
  custom_metrics: {}
  date: 2021-10-25_00-54-23
  done: false
  episode_len_mean: 269.47
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.694699999999986
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2569
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.993472861136713e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6974498556719886
          entropy_coeff: 0.009999999999999998
          kl: 0.0063362973544945096
          policy_loss: -0.015823527011606428
          total_loss: -0.007829165293110741
          vf_explained_var: 0.09375334531068802
          vf_loss: 0.014968858431610796
    num_agent_steps_sampled: 693000
    num_agent_steps_trained: 693000
    num_steps_sampled: 693000
    num_steps_trained: 693

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,693,30200.5,693000,-2.6947,-2.34,-3.61,269.47


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 694000
  custom_metrics: {}
  date: 2021-10-25_00-55-05
  done: false
  episode_len_mean: 269.96
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.699599999999987
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2573
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.993472861136713e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7605866405698988
          entropy_coeff: 0.009999999999999998
          kl: 0.008489513699722985
          policy_loss: 0.016568375709984037
          total_loss: 0.023496901988983153
          vf_explained_var: 0.13070592284202576
          vf_loss: 0.014534393780761296
    num_agent_steps_sampled: 694000
    num_agent_steps_trained: 694000
    num_steps_sampled: 694000
    num_steps_trained: 694000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,694,30242.6,694000,-2.6996,-2.34,-3.61,269.96


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 695000
  custom_metrics: {}
  date: 2021-10-25_00-55-48
  done: false
  episode_len_mean: 270.71
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.707099999999987
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2577
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.993472861136713e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.8816482755872939
          entropy_coeff: 0.009999999999999998
          kl: 0.013816680514182167
          policy_loss: 0.00993021031220754
          total_loss: 0.015476736591921912
          vf_explained_var: 0.14731566607952118
          vf_loss: 0.014363009358445804
    num_agent_steps_sampled: 695000
    num_agent_steps_trained: 695000
    num_steps_sampled: 695000
    num_steps_trained: 695000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,695,30285.9,695000,-2.7071,-2.34,-3.61,270.71




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 696000
  custom_metrics: {}
  date: 2021-10-25_00-56-48
  done: false
  episode_len_mean: 270.63
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.7062999999999864
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2581
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.993472861136713e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.0025754186842177
          entropy_coeff: 0.009999999999999998
          kl: 0.022307411383538225
          policy_loss: -0.012040519217650095
          total_loss: -0.00709651294681761
          vf_explained_var: 0.19393394887447357
          vf_loss: 0.014969760438220368
    num_agent_steps_sampled: 696000
    num_agent_steps_trained: 696000
    num_steps_sampled: 696000
    num_steps_trained: 6960

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,696,30345.8,696000,-2.7063,-2.34,-3.61,270.63


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 697000
  custom_metrics: {}
  date: 2021-10-25_00-57-28
  done: false
  episode_len_mean: 270.75
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.7074999999999863
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 2584
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.49020929170507e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.1784424960613251
          entropy_coeff: 0.009999999999999998
          kl: 0.014287339292796573
          policy_loss: 0.043207608825630614
          total_loss: 0.041736829280853274
          vf_explained_var: 0.19123005867004395
          vf_loss: 0.010313644412154746
    num_agent_steps_sampled: 697000
    num_agent_steps_trained: 697000
    num_steps_sampled: 697000
    num_steps_trained: 697000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,697,30385.8,697000,-2.7075,-2.34,-3.61,270.75


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 698000
  custom_metrics: {}
  date: 2021-10-25_00-58-10
  done: false
  episode_len_mean: 270.11
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.7010999999999865
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2588
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.49020929170507e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.0221198366747961
          entropy_coeff: 0.009999999999999998
          kl: 0.019862369244264065
          policy_loss: 0.04622086129254765
          total_loss: 0.04850880056619644
          vf_explained_var: 0.43405529856681824
          vf_loss: 0.012509139492693874
    num_agent_steps_sampled: 698000
    num_agent_steps_trained: 698000
    num_steps_sampled: 698000
    num_steps_trained: 698000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,698,30428.2,698000,-2.7011,-2.34,-3.61,270.11


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 699000
  custom_metrics: {}
  date: 2021-10-25_00-58-55
  done: false
  episode_len_mean: 269.64
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.6963999999999864
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2592
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.49020929170507e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.0077254725827112
          entropy_coeff: 0.009999999999999998
          kl: 0.014840271950716912
          policy_loss: -0.02527663658062617
          total_loss: -0.02367365186413129
          vf_explained_var: 0.4777695834636688
          vf_loss: 0.011680240561771724
    num_agent_steps_sampled: 699000
    num_agent_steps_trained: 699000
    num_steps_sampled: 699000
    num_steps_trained: 699000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,699,30472.3,699000,-2.6964,-2.34,-3.61,269.64


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 700000
  custom_metrics: {}
  date: 2021-10-25_00-59-35
  done: false
  episode_len_mean: 270.05
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.7004999999999866
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 2595
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.49020929170507e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.4226285305288102
          entropy_coeff: 0.009999999999999998
          kl: 0.03897966309605915
          policy_loss: 0.025011885911226273
          total_loss: 0.020432594998015297
          vf_explained_var: 0.5293310284614563
          vf_loss: 0.009646993534018596
    num_agent_steps_sampled: 700000
    num_agent_steps_trained: 700000
    num_steps_sampled: 700000
    num_steps_trained: 700000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,700,30512.3,700000,-2.7005,-2.34,-3.61,270.05


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 701000
  custom_metrics: {}
  date: 2021-10-25_01-00-17
  done: false
  episode_len_mean: 270.28
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.7027999999999857
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 2599
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1235313937557601e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.3501840637789833
          entropy_coeff: 0.009999999999999998
          kl: 0.041192103775941964
          policy_loss: -0.019586269888612957
          total_loss: -0.02339202794763777
          vf_explained_var: 0.6232604384422302
          vf_loss: 0.009696082002483309
    num_agent_steps_sampled: 701000
    num_agent_steps_trained: 701000
    num_steps_sampled: 701000
    num_steps_trained: 7010

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,701,30554.6,701000,-2.7028,-2.34,-3.61,270.28


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 702000
  custom_metrics: {}
  date: 2021-10-25_01-01-01
  done: false
  episode_len_mean: 270.33
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.7032999999999863
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 2602
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.685297090633641e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.6560763676961263
          entropy_coeff: 0.009999999999999998
          kl: 0.038573627739865245
          policy_loss: 0.016589390734831493
          total_loss: 0.007387498352262709
          vf_explained_var: 0.6323909163475037
          vf_loss: 0.007358872580678306
    num_agent_steps_sampled: 702000
    num_agent_steps_trained: 702000
    num_steps_sampled: 702000
    num_steps_trained: 702000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,702,30598.3,702000,-2.7033,-2.34,-3.61,270.33


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 703000
  custom_metrics: {}
  date: 2021-10-25_01-01-46
  done: false
  episode_len_mean: 268.29
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.6828999999999867
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 4
  episodes_total: 2606
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5279456359504607e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9717698865466647
          entropy_coeff: 0.009999999999999998
          kl: 0.009459762426636095
          policy_loss: 0.0025180947449472213
          total_loss: 0.004809522670176294
          vf_explained_var: 0.470005065202713
          vf_loss: 0.01200912692066696
    num_agent_steps_sampled: 703000
    num_agent_steps_trained: 703000
    num_steps_sampled: 703000
    num_steps_trained: 703000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,703,30643.8,703000,-2.6829,-2.34,-3.38,268.29




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 704000
  custom_metrics: {}
  date: 2021-10-25_01-02-48
  done: false
  episode_len_mean: 267.64
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.676399999999987
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 4
  episodes_total: 2610
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5279456359504607e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.205496291981803
          entropy_coeff: 0.009999999999999998
          kl: 0.023660322167634244
          policy_loss: -0.011923075053426954
          total_loss: -0.01328923896782928
          vf_explained_var: 0.4756399393081665
          vf_loss: 0.010688796608398358
    num_agent_steps_sampled: 704000
    num_agent_steps_trained: 704000
    num_steps_sampled: 704000
    num_steps_trained: 70400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,704,30705.6,704000,-2.6764,-2.19,-3.38,267.64


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 705000
  custom_metrics: {}
  date: 2021-10-25_01-03-32
  done: false
  episode_len_mean: 267.47
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.674699999999986
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 4
  episodes_total: 2614
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7919184539256907e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2627342753940158
          entropy_coeff: 0.009999999999999998
          kl: 0.028027116882746998
          policy_loss: 0.001038455218076706
          total_loss: 0.00011452875203556484
          vf_explained_var: 0.42995485663414
          vf_loss: 0.011703414159516494
    num_agent_steps_sampled: 705000
    num_agent_steps_trained: 705000
    num_steps_sampled: 705000
    num_steps_trained: 70500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,705,30750,705000,-2.6747,-2.19,-3.38,267.47


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 706000
  custom_metrics: {}
  date: 2021-10-25_01-04-14
  done: false
  episode_len_mean: 267.84
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6783999999999866
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 3
  episodes_total: 2617
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.687877680888536e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2955728689829509
          entropy_coeff: 0.009999999999999998
          kl: 0.02361587088231533
          policy_loss: -0.0853288428650962
          total_loss: -0.08693353881438573
          vf_explained_var: 0.3071049749851227
          vf_loss: 0.011351035421507226
    num_agent_steps_sampled: 706000
    num_agent_steps_trained: 706000
    num_steps_sampled: 706000
    num_steps_trained: 706000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,706,30791.7,706000,-2.6784,-2.19,-3.38,267.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 707000
  custom_metrics: {}
  date: 2021-10-25_01-04-55
  done: false
  episode_len_mean: 268.75
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6874999999999862
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 4
  episodes_total: 2621
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.531816521332808e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.3325030499034458
          entropy_coeff: 0.009999999999999998
          kl: 0.017022556341004996
          policy_loss: 0.02134917750954628
          total_loss: 0.018692447162336773
          vf_explained_var: 0.3613959848880768
          vf_loss: 0.010668298064006699
    num_agent_steps_sampled: 707000
    num_agent_steps_trained: 707000
    num_steps_sampled: 707000
    num_steps_trained: 707000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,707,30832.2,707000,-2.6875,-2.19,-3.38,268.75


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 708000
  custom_metrics: {}
  date: 2021-10-25_01-05-35
  done: false
  episode_len_mean: 269.79
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6978999999999864
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 4
  episodes_total: 2625
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.531816521332808e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2127934442626105
          entropy_coeff: 0.009999999999999998
          kl: 0.015415624399767264
          policy_loss: -0.017236814523736637
          total_loss: -0.018377512320876123
          vf_explained_var: 0.32038429379463196
          vf_loss: 0.010987235905809535
    num_agent_steps_sampled: 708000
    num_agent_steps_trained: 708000
    num_steps_sampled: 708000
    num_steps_trained: 70

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,708,30872.6,708000,-2.6979,-2.19,-3.38,269.79


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 709000
  custom_metrics: {}
  date: 2021-10-25_01-06-17
  done: false
  episode_len_mean: 270.01
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.700099999999986
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 3
  episodes_total: 2628
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.531816521332808e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0086788839764065
          entropy_coeff: 0.009999999999999998
          kl: 0.00860204405389761
          policy_loss: -0.03917613484793239
          total_loss: -0.03953861470023791
          vf_explained_var: 0.21403050422668457
          vf_loss: 0.009724309145369463
    num_agent_steps_sampled: 709000
    num_agent_steps_trained: 709000
    num_steps_sampled: 709000
    num_steps_trained: 709000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,709,30914.1,709000,-2.7001,-2.19,-3.38,270.01


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 710000
  custom_metrics: {}
  date: 2021-10-25_01-06-56
  done: false
  episode_len_mean: 270.45
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.7044999999999857
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 4
  episodes_total: 2632
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.531816521332808e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.1641537957721286
          entropy_coeff: 0.009999999999999998
          kl: 0.008103431025804476
          policy_loss: 0.01495500538084242
          total_loss: 0.016191219662626585
          vf_explained_var: 0.21338912844657898
          vf_loss: 0.012877753749489785
    num_agent_steps_sampled: 710000
    num_agent_steps_trained: 710000
    num_steps_sampled: 710000
    num_steps_trained: 71000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,710,30953.4,710000,-2.7045,-2.19,-3.38,270.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 711000
  custom_metrics: {}
  date: 2021-10-25_01-07-38
  done: false
  episode_len_mean: 270.98
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.7097999999999867
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 4
  episodes_total: 2636
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.531816521332808e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9480960064464146
          entropy_coeff: 0.009999999999999998
          kl: 0.011536023881149375
          policy_loss: 0.02595018098751704
          total_loss: 0.027350989894734487
          vf_explained_var: 0.2607789933681488
          vf_loss: 0.010881767334002588
    num_agent_steps_sampled: 711000
    num_agent_steps_trained: 711000
    num_steps_sampled: 711000
    num_steps_trained: 711000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,711,30995.3,711000,-2.7098,-2.19,-3.38,270.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 712000
  custom_metrics: {}
  date: 2021-10-25_01-08-17
  done: false
  episode_len_mean: 271.71
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.7170999999999865
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 3
  episodes_total: 2639
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.531816521332808e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0969982279671564
          entropy_coeff: 0.009999999999999998
          kl: 0.019806571729549916
          policy_loss: -0.05818544170922703
          total_loss: -0.061443127112256156
          vf_explained_var: 0.6082972884178162
          vf_loss: 0.007712296011029846
    num_agent_steps_sampled: 712000
    num_agent_steps_trained: 712000
    num_steps_sampled: 712000
    num_steps_trained: 7120

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,712,31034.6,712000,-2.7171,-2.19,-3.38,271.71




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 713000
  custom_metrics: {}
  date: 2021-10-25_01-09-19
  done: false
  episode_len_mean: 271.64
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.716399999999985
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 4
  episodes_total: 2643
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.531816521332808e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6273865958054861
          entropy_coeff: 0.009999999999999998
          kl: 0.00726350444578985
          policy_loss: -0.06640678739382161
          total_loss: -0.06555326581001282
          vf_explained_var: 0.7007670402526855
          vf_loss: 0.0071273883959899345
    num_agent_steps_sampled: 713000
    num_agent_steps_trained: 713000
    num_steps_sampled: 713000
    num_steps_trained: 713000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,713,31096.7,713000,-2.7164,-2.19,-3.38,271.64


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 714000
  custom_metrics: {}
  date: 2021-10-25_01-10-03
  done: false
  episode_len_mean: 270.59
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.7058999999999855
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 4
  episodes_total: 2647
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.531816521332808e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8308061275217268
          entropy_coeff: 0.009999999999999998
          kl: 0.006892281272553862
          policy_loss: -0.0688445224530167
          total_loss: -0.06856918128000365
          vf_explained_var: 0.5972372889518738
          vf_loss: 0.008583402990673979
    num_agent_steps_sampled: 714000
    num_agent_steps_trained: 714000
    num_steps_sampled: 714000
    num_steps_trained: 714000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,714,31140.5,714000,-2.7059,-2.19,-3.38,270.59


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 715000
  custom_metrics: {}
  date: 2021-10-25_01-10-49
  done: false
  episode_len_mean: 268.97
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6896999999999864
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 4
  episodes_total: 2651
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.531816521332808e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7182261811362373
          entropy_coeff: 0.009999999999999998
          kl: 0.05017379829946075
          policy_loss: -0.06397865356670486
          total_loss: -0.06340424145261446
          vf_explained_var: 0.5968433618545532
          vf_loss: 0.007756670869679914
    num_agent_steps_sampled: 715000
    num_agent_steps_trained: 715000
    num_steps_sampled: 715000
    num_steps_trained: 715000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,715,31186.1,715000,-2.6897,-2.19,-3.38,268.97


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 716000
  custom_metrics: {}
  date: 2021-10-25_01-11-34
  done: false
  episode_len_mean: 266.6
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.665999999999987
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 5
  episodes_total: 2656
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2797724781999205e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.41663697726196713
          entropy_coeff: 0.009999999999999998
          kl: 0.018963740886508006
          policy_loss: -0.034067979206641515
          total_loss: -0.029779491697748503
          vf_explained_var: 0.602664053440094
          vf_loss: 0.008454857761454252
    num_agent_steps_sampled: 716000
    num_agent_steps_trained: 716000
    num_steps_sampled: 716000
    num_steps_trained: 7160

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,716,31231.6,716000,-2.666,-2.19,-3.38,266.6


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 717000
  custom_metrics: {}
  date: 2021-10-25_01-12-19
  done: false
  episode_len_mean: 265.25
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6524999999999874
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 4
  episodes_total: 2660
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2797724781999205e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6573771523104773
          entropy_coeff: 0.009999999999999998
          kl: 0.10412082300389147
          policy_loss: 0.006877119508054521
          total_loss: 0.00847643299235238
          vf_explained_var: 0.4565597176551819
          vf_loss: 0.008173085516318679
    num_agent_steps_sampled: 717000
    num_agent_steps_trained: 717000
    num_steps_sampled: 717000
    num_steps_trained: 717000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,717,31275.9,717000,-2.6525,-2.19,-3.38,265.25


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 718000
  custom_metrics: {}
  date: 2021-10-25_01-12-55
  done: false
  episode_len_mean: 266.19
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.661899999999987
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 3
  episodes_total: 2663
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9196587172998815e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9004442691802979
          entropy_coeff: 0.009999999999999998
          kl: 0.09790164240651648
          policy_loss: 0.03495579717887772
          total_loss: 0.03398500242167049
          vf_explained_var: 0.4174855649471283
          vf_loss: 0.008033650073533257
    num_agent_steps_sampled: 718000
    num_agent_steps_trained: 718000
    num_steps_sampled: 718000
    num_steps_trained: 718000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,718,31312.3,718000,-2.6619,-2.19,-3.38,266.19


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 719000
  custom_metrics: {}
  date: 2021-10-25_01-13-34
  done: false
  episode_len_mean: 266.98
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.669799999999987
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 4
  episodes_total: 2667
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8794880759498223e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.910958409971661
          entropy_coeff: 0.009999999999999998
          kl: 0.023216708645822356
          policy_loss: -0.010523785485161676
          total_loss: -0.008060050341818068
          vf_explained_var: 0.1905791461467743
          vf_loss: 0.01157331724340717
    num_agent_steps_sampled: 719000
    num_agent_steps_trained: 719000
    num_steps_sampled: 719000
    num_steps_trained: 71900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,719,31351.1,719000,-2.6698,-2.19,-3.38,266.98




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 720000
  custom_metrics: {}
  date: 2021-10-25_01-14-32
  done: false
  episode_len_mean: 266.77
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.6676999999999875
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 3
  episodes_total: 2670
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.319232113924733e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7410428881645202
          entropy_coeff: 0.009999999999999998
          kl: 0.023931784306904864
          policy_loss: -0.14792355050643285
          total_loss: -0.14588279094960954
          vf_explained_var: 0.3374169170856476
          vf_loss: 0.009451186217160689
    num_agent_steps_sampled: 720000
    num_agent_steps_trained: 720000
    num_steps_sampled: 720000
    num_steps_trained: 72000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,720,31409.3,720000,-2.6677,-2.12,-3.38,266.77


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 721000
  custom_metrics: {}
  date: 2021-10-25_01-15-15
  done: false
  episode_len_mean: 266.74
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.667399999999987
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 4
  episodes_total: 2674
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.478848170887101e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8272703594631619
          entropy_coeff: 0.009999999999999998
          kl: 0.018503459234282045
          policy_loss: -0.03651496171951294
          total_loss: -0.032782078948285845
          vf_explained_var: 0.10267437994480133
          vf_loss: 0.012005588474373023
    num_agent_steps_sampled: 721000
    num_agent_steps_trained: 721000
    num_steps_sampled: 721000
    num_steps_trained: 7210

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,721,31452.6,721000,-2.6674,-2.12,-3.38,266.74


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 722000
  custom_metrics: {}
  date: 2021-10-25_01-16-00
  done: false
  episode_len_mean: 266.43
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.664299999999987
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 4
  episodes_total: 2678
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.478848170887101e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.049833635489146
          entropy_coeff: 0.009999999999999998
          kl: 0.0651670594415661
          policy_loss: -0.0633363491959042
          total_loss: -0.06338177290227678
          vf_explained_var: 0.17504630982875824
          vf_loss: 0.010452911951061752
    num_agent_steps_sampled: 722000
    num_agent_steps_trained: 722000
    num_steps_sampled: 722000
    num_steps_trained: 722000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,722,31497.1,722000,-2.6643,-2.12,-3.38,266.43


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 723000
  custom_metrics: {}
  date: 2021-10-25_01-16-40
  done: false
  episode_len_mean: 266.83
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.6682999999999866
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 4
  episodes_total: 2682
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.71827225633065e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.2897661632961697
          entropy_coeff: 0.009999999999999998
          kl: 0.01710927029415177
          policy_loss: 0.013985823260413276
          total_loss: 0.01114892851975229
          vf_explained_var: 0.1990385502576828
          vf_loss: 0.01006076776733001
    num_agent_steps_sampled: 723000
    num_agent_steps_trained: 723000
    num_steps_sampled: 723000
    num_steps_trained: 723000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,723,31537,723000,-2.6683,-2.12,-3.38,266.83


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 724000
  custom_metrics: {}
  date: 2021-10-25_01-17-18
  done: false
  episode_len_mean: 267.07
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.6706999999999868
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 3
  episodes_total: 2685
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.71827225633065e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.4138123432795207
          entropy_coeff: 0.009999999999999998
          kl: 0.040265470646514485
          policy_loss: 0.03271379768848419
          total_loss: 0.02634567850165897
          vf_explained_var: 0.06621697545051575
          vf_loss: 0.0077700063961351085
    num_agent_steps_sampled: 724000
    num_agent_steps_trained: 724000
    num_steps_sampled: 724000
    num_steps_trained: 724000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,724,31574.8,724000,-2.6707,-2.12,-3.38,267.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 725000
  custom_metrics: {}
  date: 2021-10-25_01-17-54
  done: false
  episode_len_mean: 268.1
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.6809999999999867
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 3
  episodes_total: 2688
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4577408384495977e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1700243910153707
          entropy_coeff: 0.009999999999999998
          kl: 0.03151713023579358
          policy_loss: -0.1325542455746068
          total_loss: -0.13495926699704594
          vf_explained_var: 0.3536711037158966
          vf_loss: 0.009295223880973128
    num_agent_steps_sampled: 725000
    num_agent_steps_trained: 725000
    num_steps_sampled: 725000
    num_steps_trained: 725000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,725,31611.4,725000,-2.681,-2.12,-3.38,268.1


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 726000
  custom_metrics: {}
  date: 2021-10-25_01-18-23
  done: false
  episode_len_mean: 271.05
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.7104999999999864
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2691
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1866112576743964e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.0565520690547094
          entropy_coeff: 0.009999999999999998
          kl: 0.02429964052781861
          policy_loss: -0.004188970641957389
          total_loss: -0.007507855610715018
          vf_explained_var: 0.2901860475540161
          vf_loss: 0.007246642858566095
    num_agent_steps_sampled: 726000
    num_agent_steps_trained: 726000
    num_steps_sampled: 726000
    num_steps_trained: 72

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,726,31639.8,726000,-2.7105,-2.12,-3.97,271.05


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 727000
  custom_metrics: {}
  date: 2021-10-25_01-18-57
  done: false
  episode_len_mean: 273.4
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.7339999999999858
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2694
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.279916886511594e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.2538887951109143
          entropy_coeff: 0.009999999999999998
          kl: 0.01463061819909309
          policy_loss: 0.03588058236572478
          total_loss: 0.03284508511424065
          vf_explained_var: 0.21531711518764496
          vf_loss: 0.009503390620617816
    num_agent_steps_sampled: 727000
    num_agent_steps_trained: 727000
    num_steps_sampled: 727000
    num_steps_trained: 727000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,727,31674.2,727000,-2.734,-2.12,-3.97,273.4


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 728000
  custom_metrics: {}
  date: 2021-10-25_01-19-32
  done: false
  episode_len_mean: 274.08
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.7407999999999855
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2697
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.279916886511594e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.3496983819537693
          entropy_coeff: 0.009999999999999998
          kl: 0.013568217565070897
          policy_loss: 0.02247613767782847
          total_loss: 0.018204010443554984
          vf_explained_var: 0.10041104257106781
          vf_loss: 0.009224855012467338
    num_agent_steps_sampled: 728000
    num_agent_steps_trained: 728000
    num_steps_sampled: 728000
    num_steps_trained: 7280

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,728,31708.6,728000,-2.7408,-2.12,-3.97,274.08




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 729000
  custom_metrics: {}
  date: 2021-10-25_01-20-24
  done: false
  episode_len_mean: 274.78
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.7477999999999856
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2700
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.279916886511594e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1951929516262478
          entropy_coeff: 0.009999999999999998
          kl: 0.014019321583112622
          policy_loss: -0.0026153869926929473
          total_loss: -0.004786493380864462
          vf_explained_var: 0.05700932443141937
          vf_loss: 0.009780822285554476
    num_agent_steps_sampled: 729000
    num_agent_steps_trained: 729000
    num_steps_sampled: 729000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,729,31761.2,729000,-2.7478,-2.12,-3.97,274.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 730000
  custom_metrics: {}
  date: 2021-10-25_01-21-00
  done: false
  episode_len_mean: 276.82
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.7681999999999842
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2703
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.279916886511594e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.253206111325158
          entropy_coeff: 0.009999999999999998
          kl: 0.027944110928022268
          policy_loss: -0.07226473987102508
          total_loss: -0.0707094571656651
          vf_explained_var: 0.04295537993311882
          vf_loss: 0.014087344261093272
    num_agent_steps_sampled: 730000
    num_agent_steps_trained: 730000
    num_steps_sampled: 730000
    num_steps_trained: 73000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,730,31796.4,730000,-2.7682,-2.12,-3.97,276.82


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 731000
  custom_metrics: {}
  date: 2021-10-25_01-21-41
  done: false
  episode_len_mean: 278.0
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.7799999999999843
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 4
  episodes_total: 2707
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.919875329767391e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9951623413297865
          entropy_coeff: 0.009999999999999998
          kl: 0.06861291421803202
          policy_loss: -0.012116394088500076
          total_loss: -0.010021657269034121
          vf_explained_var: 0.3102589547634125
          vf_loss: 0.01204635927764078
    num_agent_steps_sampled: 731000
    num_agent_steps_trained: 731000
    num_steps_sampled: 731000
    num_steps_trained: 73100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,731,31838.1,731000,-2.78,-2.12,-3.97,278


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 732000
  custom_metrics: {}
  date: 2021-10-25_01-22-14
  done: false
  episode_len_mean: 280.39
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.803899999999983
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2710
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.379812994651084e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8634706007109748
          entropy_coeff: 0.009999999999999998
          kl: 0.01420963355498404
          policy_loss: 0.04631047265397178
          total_loss: 0.045618512729803724
          vf_explained_var: 0.49417805671691895
          vf_loss: 0.007942742758314126
    num_agent_steps_sampled: 732000
    num_agent_steps_trained: 732000
    num_steps_sampled: 732000
    num_steps_trained: 732000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,732,31870.9,732000,-2.8039,-2.12,-3.97,280.39


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 733000
  custom_metrics: {}
  date: 2021-10-25_01-22-47
  done: false
  episode_len_mean: 282.87
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.8286999999999836
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2713
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.379812994651084e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9322513957818349
          entropy_coeff: 0.009999999999999998
          kl: 0.06654377246106644
          policy_loss: 0.022487015442715752
          total_loss: 0.019978091451856826
          vf_explained_var: 0.6402332186698914
          vf_loss: 0.006813588549589945
    num_agent_steps_sampled: 733000
    num_agent_steps_trained: 733000
    num_steps_sampled: 733000
    num_steps_trained: 73300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,733,31904.3,733000,-2.8287,-2.12,-3.97,282.87


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 734000
  custom_metrics: {}
  date: 2021-10-25_01-23-22
  done: false
  episode_len_mean: 283.93
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.839299999999983
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2716
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1069719491976624e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0469819128513336
          entropy_coeff: 0.009999999999999998
          kl: 0.12038615050365832
          policy_loss: 0.04950336946381463
          total_loss: 0.04520761999819014
          vf_explained_var: 0.5416909456253052
          vf_loss: 0.006174057429759867
    num_agent_steps_sampled: 734000
    num_agent_steps_trained: 734000
    num_steps_sampled: 734000
    num_steps_trained: 734000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,734,31939.2,734000,-2.8393,-2.12,-3.97,283.93


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 735000
  custom_metrics: {}
  date: 2021-10-25_01-23-47
  done: false
  episode_len_mean: 288.15
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.881499999999982
  episode_reward_min: -5.959999999999917
  episodes_this_iter: 2
  episodes_total: 2718
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.660457923796494e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8638039237923092
          entropy_coeff: 0.009999999999999998
          kl: 0.01971476516336524
          policy_loss: 0.027570559084415434
          total_loss: 0.026417666922012965
          vf_explained_var: 0.7452858090400696
          vf_loss: 0.007485145186850181
    num_agent_steps_sampled: 735000
    num_agent_steps_trained: 735000
    num_steps_sampled: 735000
    num_steps_trained: 735000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,735,31963.7,735000,-2.8815,-2.12,-5.96,288.15


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 736000
  custom_metrics: {}
  date: 2021-10-25_01-24-11
  done: false
  episode_len_mean: 292.88
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.9287999999999816
  episode_reward_min: -6.7999999999998995
  episodes_this_iter: 2
  episodes_total: 2720
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.660457923796494e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8952372358904944
          entropy_coeff: 0.009999999999999998
          kl: 0.012787690567954257
          policy_loss: 0.03157716509368685
          total_loss: 0.028079730768998463
          vf_explained_var: 0.6488028168678284
          vf_loss: 0.005454937122865684
    num_agent_steps_sampled: 736000
    num_agent_steps_trained: 736000
    num_steps_sampled: 736000
    num_steps_trained: 73600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,736,31987.5,736000,-2.9288,-2.12,-6.8,292.88


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 737000
  custom_metrics: {}
  date: 2021-10-25_01-24-29
  done: false
  episode_len_mean: 296.24
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.9623999999999806
  episode_reward_min: -6.7999999999998995
  episodes_this_iter: 1
  episodes_total: 2721
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.660457923796494e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0160647564464145
          entropy_coeff: 0.009999999999999998
          kl: 0.017007428042658052
          policy_loss: -0.030890542682674195
          total_loss: -0.03519652212659518
          vf_explained_var: 0.5932350754737854
          vf_loss: 0.005854666453605104
    num_agent_steps_sampled: 737000
    num_agent_steps_trained: 737000
    num_steps_sampled: 737000
    num_steps_trained: 737

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,737,32005.9,737000,-2.9624,-2.12,-6.8,296.24


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 738000
  custom_metrics: {}
  date: 2021-10-25_01-24-51
  done: false
  episode_len_mean: 302.76
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -3.02759999999998
  episode_reward_min: -6.7999999999998995
  episodes_this_iter: 2
  episodes_total: 2723
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.660457923796494e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0388355645868512
          entropy_coeff: 0.009999999999999998
          kl: 0.03992823112284161
          policy_loss: -0.08171632472011778
          total_loss: -0.08041909138361612
          vf_explained_var: -0.13671734929084778
          vf_loss: 0.011685581510472628
    num_agent_steps_sampled: 738000
    num_agent_steps_trained: 738000
    num_steps_sampled: 738000
    num_steps_trained: 73800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,738,32028.1,738000,-3.0276,-2.12,-6.8,302.76


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 739000
  custom_metrics: {}
  date: 2021-10-25_01-25-16
  done: false
  episode_len_mean: 307.23
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -3.072299999999978
  episode_reward_min: -6.7999999999998995
  episodes_this_iter: 2
  episodes_total: 2725
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4906868856947407e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1361071017053392
          entropy_coeff: 0.009999999999999998
          kl: 0.03893279774164616
          policy_loss: -0.09968413478798337
          total_loss: -0.10009252164098952
          vf_explained_var: 0.11476677656173706
          vf_loss: 0.010952676427809314
    num_agent_steps_sampled: 739000
    num_agent_steps_trained: 739000
    num_steps_sampled: 739000
    num_steps_trained: 7390

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,739,32052.2,739000,-3.0723,-2.12,-6.8,307.23


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 740000
  custom_metrics: {}
  date: 2021-10-25_01-25-30
  done: false
  episode_len_mean: 311.43
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -3.1142999999999774
  episode_reward_min: -6.7999999999998995
  episodes_this_iter: 1
  episodes_total: 2726
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7360303285421123e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.30465244816409215
          entropy_coeff: 0.009999999999999998
          kl: 0.0012343847929000636
          policy_loss: -0.05103802680969238
          total_loss: -0.04788126481903924
          vf_explained_var: -0.12347545474767685
          vf_loss: 0.0062032828563436246
    num_agent_steps_sampled: 740000
    num_agent_steps_trained: 740000
    num_steps_sampled: 740000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,740,32066.6,740000,-3.1143,-2.12,-6.8,311.43


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 741000
  custom_metrics: {}
  date: 2021-10-25_01-25-45
  done: false
  episode_len_mean: 317.46
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -3.174599999999976
  episode_reward_min: -8.719999999999859
  episodes_this_iter: 1
  episodes_total: 2727
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8680151642710562e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.343734263545937
          entropy_coeff: 0.009999999999999998
          kl: 0.00283244465885054
          policy_loss: -0.056960789528157973
          total_loss: -0.05363557810584704
          vf_explained_var: -0.05402277410030365
          vf_loss: 0.006762553060333529
    num_agent_steps_sampled: 741000
    num_agent_steps_trained: 741000
    num_steps_sampled: 741000
    num_steps_trained: 7410

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,741,32081.3,741000,-3.1746,-2.12,-8.72,317.46


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 742000
  custom_metrics: {}
  date: 2021-10-25_01-25-59
  done: false
  episode_len_mean: 329.48
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -3.2947999999999733
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 2
  episodes_total: 2729
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.340075821355281e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.4359968238406711
          entropy_coeff: 0.009999999999999998
          kl: 0.049808958610439866
          policy_loss: 0.09250421788957384
          total_loss: 0.09514914751052857
          vf_explained_var: 0.03515178710222244
          vf_loss: 0.0070048962530967806
    num_agent_steps_sampled: 742000
    num_agent_steps_trained: 742000
    num_steps_sampled: 742000
    num_steps_trained: 742000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,742,32095.5,742000,-3.2948,-2.12,-9.08,329.48




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 743000
  custom_metrics: {}
  date: 2021-10-25_01-26-36
  done: false
  episode_len_mean: 332.05
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -3.3204999999999734
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 1
  episodes_total: 2730
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4010113732032919e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.5481229042013486
          entropy_coeff: 0.009999999999999998
          kl: 0.0030918383468594535
          policy_loss: -0.0755894236266613
          total_loss: -0.07400827200876342
          vf_explained_var: 0.270017147064209
          vf_loss: 0.007062379397717046
    num_agent_steps_sampled: 743000
    num_agent_steps_trained: 743000
    num_steps_sampled: 743000
    num_steps_trained: 743000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,743,32132.5,743000,-3.3205,-2.12,-9.08,332.05


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 744000
  custom_metrics: {}
  date: 2021-10-25_01-26-56
  done: false
  episode_len_mean: 339.84
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -3.398399999999971
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 2
  episodes_total: 2732
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.005056866016459e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6128880595167477
          entropy_coeff: 0.009999999999999998
          kl: 0.03099781516725919
          policy_loss: 0.10509919366902776
          total_loss: 0.10557558039824168
          vf_explained_var: 0.08873597532510757
          vf_loss: 0.006605264526216261
    num_agent_steps_sampled: 744000
    num_agent_steps_trained: 744000
    num_steps_sampled: 744000
    num_steps_trained: 744000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,744,32152.5,744000,-3.3984,-2.12,-9.08,339.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 745000
  custom_metrics: {}
  date: 2021-10-25_01-27-14
  done: false
  episode_len_mean: 343.42
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -3.4341999999999704
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 1
  episodes_total: 2733
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0507585299024688e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6894512481159634
          entropy_coeff: 0.009999999999999998
          kl: 0.014898814945408706
          policy_loss: -0.05257330884536107
          total_loss: -0.052578941980997725
          vf_explained_var: 0.5468963384628296
          vf_loss: 0.006888881219977824
    num_agent_steps_sampled: 745000
    num_agent_steps_trained: 745000
    num_steps_sampled: 745000
    num_steps_trained: 7450

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,745,32170.6,745000,-3.4342,-2.12,-9.08,343.42


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 746000
  custom_metrics: {}
  date: 2021-10-25_01-27-32
  done: false
  episode_len_mean: 352.5
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -3.524999999999969
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 2
  episodes_total: 2735
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0507585299024688e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6967396312289768
          entropy_coeff: 0.009999999999999998
          kl: 0.01574156402290296
          policy_loss: 0.09387131813499662
          total_loss: 0.09469343158933852
          vf_explained_var: 0.036465439945459366
          vf_loss: 0.007789505279571232
    num_agent_steps_sampled: 746000
    num_agent_steps_trained: 746000
    num_steps_sampled: 746000
    num_steps_trained: 746000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,746,32188.2,746000,-3.525,-2.12,-9.08,352.5


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 747000
  custom_metrics: {}
  date: 2021-10-25_01-27-50
  done: false
  episode_len_mean: 357.0
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -3.5699999999999683
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 1
  episodes_total: 2736
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0507585299024688e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.644559735722012
          entropy_coeff: 0.009999999999999998
          kl: 0.014226097393012083
          policy_loss: -0.057964732746283214
          total_loss: -0.0566544309258461
          vf_explained_var: -0.6021867990493774
          vf_loss: 0.0077558937065380935
    num_agent_steps_sampled: 747000
    num_agent_steps_trained: 747000
    num_steps_sampled: 747000
    num_steps_trained: 74700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,747,32206.2,747000,-3.57,-2.12,-9.08,357


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 748000
  custom_metrics: {}
  date: 2021-10-25_01-28-08
  done: false
  episode_len_mean: 360.72
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -3.607199999999967
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 1
  episodes_total: 2737
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0507585299024688e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7276014612780677
          entropy_coeff: 0.009999999999999998
          kl: 0.019504304885455643
          policy_loss: -0.06865572018755807
          total_loss: -0.0680840402841568
          vf_explained_var: 0.0036122985184192657
          vf_loss: 0.007847693100080102
    num_agent_steps_sampled: 748000
    num_agent_steps_trained: 748000
    num_steps_sampled: 748000
    num_steps_trained: 7480

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,748,32224.3,748000,-3.6072,-2.12,-9.08,360.72


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 749000
  custom_metrics: {}
  date: 2021-10-25_01-28-27
  done: false
  episode_len_mean: 368.68
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -3.686799999999965
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 2
  episodes_total: 2739
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0507585299024688e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7790733383761512
          entropy_coeff: 0.009999999999999998
          kl: 0.01370238524250414
          policy_loss: -0.08555963138739268
          total_loss: -0.07919434292448892
          vf_explained_var: -0.18452635407447815
          vf_loss: 0.014156020713400923
    num_agent_steps_sampled: 749000
    num_agent_steps_trained: 749000
    num_steps_sampled: 749000
    num_steps_trained: 74900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,749,32243.6,749000,-3.6868,-2.12,-9.08,368.68


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 750000
  custom_metrics: {}
  date: 2021-10-25_01-28-46
  done: false
  episode_len_mean: 376.89
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -3.768899999999965
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 2
  episodes_total: 2741
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0507585299024688e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.894147867626614
          entropy_coeff: 0.009999999999999998
          kl: 0.02394678845873983
          policy_loss: 0.0897877591351668
          total_loss: 0.08856361798114247
          vf_explained_var: 0.11551133543252945
          vf_loss: 0.0077173365538732875
    num_agent_steps_sampled: 750000
    num_agent_steps_trained: 750000
    num_steps_sampled: 750000
    num_steps_trained: 750000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,750,32262.7,750000,-3.7689,-2.12,-9.08,376.89


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 751000
  custom_metrics: {}
  date: 2021-10-25_01-29-07
  done: false
  episode_len_mean: 380.57
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -3.805699999999963
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 1
  episodes_total: 2742
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5761377948537033e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9987650560008154
          entropy_coeff: 0.009999999999999998
          kl: 0.023685317247839618
          policy_loss: -0.05600381609466341
          total_loss: -0.05816779120100869
          vf_explained_var: 0.22597122192382812
          vf_loss: 0.00782367119940722
    num_agent_steps_sampled: 751000
    num_agent_steps_trained: 751000
    num_steps_sampled: 751000
    num_steps_trained: 751000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,751,32283.7,751000,-3.8057,-2.12,-9.08,380.57


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 752000
  custom_metrics: {}
  date: 2021-10-25_01-29-31
  done: false
  episode_len_mean: 385.98
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -3.859799999999962
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 2
  episodes_total: 2744
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3642066922805548e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.975017120440801
          entropy_coeff: 0.009999999999999998
          kl: 0.022344344036292322
          policy_loss: -0.07799957361486223
          total_loss: -0.07255631006426282
          vf_explained_var: -0.24262294173240662
          vf_loss: 0.015193430287763477
    num_agent_steps_sampled: 752000
    num_agent_steps_trained: 752000
    num_steps_sampled: 752000
    num_steps_trained: 75200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,752,32307.3,752000,-3.8598,-2.12,-9.08,385.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 753000
  custom_metrics: {}
  date: 2021-10-25_01-29-55
  done: false
  episode_len_mean: 390.72
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -3.907199999999961
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 2
  episodes_total: 2746
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5463100384208324e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9336246053377787
          entropy_coeff: 0.009999999999999998
          kl: 0.015632517635851034
          policy_loss: -0.06476252575715383
          total_loss: -0.05883428090148502
          vf_explained_var: -0.2943521738052368
          vf_loss: 0.015264487130722652
    num_agent_steps_sampled: 753000
    num_agent_steps_trained: 753000
    num_steps_sampled: 753000
    num_steps_trained: 75300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,753,32331.7,753000,-3.9072,-2.12,-9.08,390.72


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 754000
  custom_metrics: {}
  date: 2021-10-25_01-30-22
  done: false
  episode_len_mean: 396.82
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -3.968199999999959
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 3
  episodes_total: 2749
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5463100384208324e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9216837412781186
          entropy_coeff: 0.009999999999999998
          kl: 0.01620464140606433
          policy_loss: 0.038692453503608705
          total_loss: 0.04381329036421246
          vf_explained_var: -0.08789069950580597
          vf_loss: 0.01433767431622578
    num_agent_steps_sampled: 754000
    num_agent_steps_trained: 754000
    num_steps_sampled: 754000
    num_steps_trained: 754000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,754,32358.1,754000,-3.9682,-2.12,-9.08,396.82


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 755000
  custom_metrics: {}
  date: 2021-10-25_01-30-47
  done: false
  episode_len_mean: 400.9
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -4.008999999999959
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 2
  episodes_total: 2751
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5463100384208324e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9856225377983517
          entropy_coeff: 0.009999999999999998
          kl: 0.023593673751507775
          policy_loss: 0.09336605022350947
          total_loss: 0.09055024882157643
          vf_explained_var: -0.2952866852283478
          vf_loss: 0.0070404164302292176
    num_agent_steps_sampled: 755000
    num_agent_steps_trained: 755000
    num_steps_sampled: 755000
    num_steps_trained: 755000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,755,32383.2,755000,-4.009,-2.12,-9.08,400.9


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 756000
  custom_metrics: {}
  date: 2021-10-25_01-31-15
  done: false
  episode_len_mean: 405.04
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -4.050399999999957
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 2
  episodes_total: 2753
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.31946505763125e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9775169412295024
          entropy_coeff: 0.009999999999999998
          kl: 0.012640742478129892
          policy_loss: -0.0855588843425115
          total_loss: -0.0815545603632927
          vf_explained_var: -0.22446668148040771
          vf_loss: 0.013779485841385192
    num_agent_steps_sampled: 756000
    num_agent_steps_trained: 756000
    num_steps_sampled: 756000
    num_steps_trained: 756000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,756,32411,756000,-4.0504,-2.12,-9.08,405.04


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 757000
  custom_metrics: {}
  date: 2021-10-25_01-31-44
  done: false
  episode_len_mean: 409.67
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -4.096699999999956
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 3
  episodes_total: 2756
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.31946505763125e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0133171180884044
          entropy_coeff: 0.009999999999999998
          kl: 0.014606115307499183
          policy_loss: 0.02667701476150089
          total_loss: 0.029978954709238476
          vf_explained_var: 0.06972706317901611
          vf_loss: 0.013435109404640065
    num_agent_steps_sampled: 757000
    num_agent_steps_trained: 757000
    num_steps_sampled: 757000
    num_steps_trained: 757000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,757,32439.9,757000,-4.0967,-2.12,-9.08,409.67


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 758000
  custom_metrics: {}
  date: 2021-10-25_01-32-13
  done: false
  episode_len_mean: 413.19
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -4.1318999999999555
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 2
  episodes_total: 2758
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.31946505763125e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9572761548890008
          entropy_coeff: 0.009999999999999998
          kl: 0.008570465472149201
          policy_loss: -0.07853446784946654
          total_loss: -0.07603455748822954
          vf_explained_var: -0.2464417815208435
          vf_loss: 0.012072669287833074
    num_agent_steps_sampled: 758000
    num_agent_steps_trained: 758000
    num_steps_sampled: 758000
    num_steps_trained: 758000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,758,32468.8,758000,-4.1319,-2.12,-9.08,413.19




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 759000
  custom_metrics: {}
  date: 2021-10-25_01-32-59
  done: false
  episode_len_mean: 416.93
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -4.169299999999955
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 3
  episodes_total: 2761
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.31946505763125e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8749794920285543
          entropy_coeff: 0.009999999999999998
          kl: 0.022582038033828133
          policy_loss: 0.040007352415058345
          total_loss: 0.04313489836123255
          vf_explained_var: -0.23772461712360382
          vf_loss: 0.01187732761285992
    num_agent_steps_sampled: 759000
    num_agent_steps_trained: 759000
    num_steps_sampled: 759000
    num_steps_trained: 759000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,759,32514.6,759000,-4.1693,-2.12,-9.08,416.93


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 760000
  custom_metrics: {}
  date: 2021-10-25_01-33-31
  done: false
  episode_len_mean: 418.67
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -4.186699999999955
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 3
  episodes_total: 2764
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.979197586446876e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6959937307569716
          entropy_coeff: 0.009999999999999998
          kl: 0.02235291956148602
          policy_loss: 0.02731792281071345
          total_loss: 0.03397008876005809
          vf_explained_var: 0.0793653205037117
          vf_loss: 0.013612086477223784
    num_agent_steps_sampled: 760000
    num_agent_steps_trained: 760000
    num_steps_sampled: 760000
    num_steps_trained: 760000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,760,32547,760000,-4.1867,-2.12,-9.08,418.67


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 761000
  custom_metrics: {}
  date: 2021-10-25_01-34-03
  done: false
  episode_len_mean: 419.54
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -4.195399999999954
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 2
  episodes_total: 2766
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1968796379670313e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5932543214824465
          entropy_coeff: 0.009999999999999998
          kl: 0.03830947274766001
          policy_loss: -0.10068621850676007
          total_loss: -0.09224600444237391
          vf_explained_var: -0.19162172079086304
          vf_loss: 0.014372716318919426
    num_agent_steps_sampled: 761000
    num_agent_steps_trained: 761000
    num_steps_sampled: 761000
    num_steps_trained: 76100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,761,32579.3,761000,-4.1954,-2.12,-9.08,419.54


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 762000
  custom_metrics: {}
  date: 2021-10-25_01-34-45
  done: false
  episode_len_mean: 421.21
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -4.212099999999954
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 4
  episodes_total: 2770
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7953194569505466e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.37469054990344575
          entropy_coeff: 0.009999999999999998
          kl: 0.019892929075787595
          policy_loss: -0.0005763559705681271
          total_loss: 0.012798315783341726
          vf_explained_var: 0.11394979804754257
          vf_loss: 0.01712154264872273
    num_agent_steps_sampled: 762000
    num_agent_steps_trained: 762000
    num_steps_sampled: 762000
    num_steps_trained: 7620

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,762,32620.9,762000,-4.2121,-2.24,-9.08,421.21


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 763000
  custom_metrics: {}
  date: 2021-10-25_01-35-28
  done: false
  episode_len_mean: 420.48
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -4.204799999999954
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 4
  episodes_total: 2774
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7953194569505466e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.3275248159137037
          entropy_coeff: 0.009999999999999998
          kl: 0.041936413830467874
          policy_loss: 0.022637647059228685
          total_loss: 0.0362574459777938
          vf_explained_var: 0.03456181287765503
          vf_loss: 0.01689497445606523
    num_agent_steps_sampled: 763000
    num_agent_steps_trained: 763000
    num_steps_sampled: 763000
    num_steps_trained: 763000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,763,32664,763000,-4.2048,-2.24,-9.08,420.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 764000
  custom_metrics: {}
  date: 2021-10-25_01-36-09
  done: false
  episode_len_mean: 420.88
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -4.208799999999954
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 4
  episodes_total: 2778
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6929791854258205e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7107373992602031
          entropy_coeff: 0.009999999999999998
          kl: 0.05511509274152944
          policy_loss: 0.01712281381090482
          total_loss: 0.02656589104897446
          vf_explained_var: 0.08509347587823868
          vf_loss: 0.016550302277836534
    num_agent_steps_sampled: 764000
    num_agent_steps_trained: 764000
    num_steps_sampled: 764000
    num_steps_trained: 764000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,764,32704.8,764000,-4.2088,-2.24,-9.08,420.88


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 765000
  custom_metrics: {}
  date: 2021-10-25_01-36-53
  done: false
  episode_len_mean: 419.67
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -4.196699999999954
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 4
  episodes_total: 2782
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.03946877813873e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.3063725855615404
          entropy_coeff: 0.009999999999999998
          kl: 0.005304711190582465
          policy_loss: 0.013011038055022557
          total_loss: 0.024892213775051963
          vf_explained_var: 0.12624478340148926
          vf_loss: 0.014944879865894715
    num_agent_steps_sampled: 765000
    num_agent_steps_trained: 765000
    num_steps_sampled: 765000
    num_steps_trained: 765000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,765,32748.8,765000,-4.1967,-2.24,-9.08,419.67


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 766000
  custom_metrics: {}
  date: 2021-10-25_01-37-34
  done: false
  episode_len_mean: 418.3
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -4.1829999999999545
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 4
  episodes_total: 2786
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.03946877813873e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.37839882175127665
          entropy_coeff: 0.009999999999999998
          kl: 0.015072893300535186
          policy_loss: 0.002594229992893007
          total_loss: 0.014849996566772461
          vf_explained_var: 0.14091305434703827
          vf_loss: 0.01603969346938862
    num_agent_steps_sampled: 766000
    num_agent_steps_trained: 766000
    num_steps_sampled: 766000
    num_steps_trained: 766000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,766,32790.1,766000,-4.183,-2.24,-9.08,418.3




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 767000
  custom_metrics: {}
  date: 2021-10-25_01-38-33
  done: false
  episode_len_mean: 414.87
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -4.148699999999955
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 4
  episodes_total: 2790
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.03946877813873e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.43264102737108867
          entropy_coeff: 0.009999999999999998
          kl: 0.009122427028125083
          policy_loss: 0.019171501944462457
          total_loss: 0.029408384445640777
          vf_explained_var: 0.14757908880710602
          vf_loss: 0.01456326176929805
    num_agent_steps_sampled: 767000
    num_agent_steps_trained: 767000
    num_steps_sampled: 767000
    num_steps_trained: 767000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,767,32848.9,767000,-4.1487,-2.07,-9.08,414.87


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 768000
  custom_metrics: {}
  date: 2021-10-25_01-39-19
  done: false
  episode_len_mean: 410.76
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -4.107599999999955
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 4
  episodes_total: 2794
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.03946877813873e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5902127560642031
          entropy_coeff: 0.009999999999999998
          kl: 0.11298300160898439
          policy_loss: -0.004850239637825224
          total_loss: 0.0021191559731960296
          vf_explained_var: 0.2816903293132782
          vf_loss: 0.01287106782094472
    num_agent_steps_sampled: 768000
    num_agent_steps_trained: 768000
    num_steps_sampled: 768000
    num_steps_trained: 768000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,768,32895,768000,-4.1076,-2.07,-9.08,410.76


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 769000
  custom_metrics: {}
  date: 2021-10-25_01-40-00
  done: false
  episode_len_mean: 408.87
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -4.088699999999956
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 3
  episodes_total: 2797
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.059203167208097e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.33921315107080674
          entropy_coeff: 0.009999999999999998
          kl: 0.010200657848875099
          policy_loss: -0.08095825711886089
          total_loss: -0.07125927143626742
          vf_explained_var: 0.14284366369247437
          vf_loss: 0.013091061274624533
    num_agent_steps_sampled: 769000
    num_agent_steps_trained: 769000
    num_steps_sampled: 769000
    num_steps_trained: 769000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,769,32935.6,769000,-4.0887,-2.07,-9.08,408.87


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 770000
  custom_metrics: {}
  date: 2021-10-25_01-40-44
  done: false
  episode_len_mean: 406.73
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -4.067299999999958
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 4
  episodes_total: 2801
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.059203167208097e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.25333979427814485
          entropy_coeff: 0.009999999999999998
          kl: 0.016779959020095773
          policy_loss: -0.05655885653363334
          total_loss: -0.04509523924854067
          vf_explained_var: 0.060048796236515045
          vf_loss: 0.013996912507961194
    num_agent_steps_sampled: 770000
    num_agent_steps_trained: 770000
    num_steps_sampled: 770000
    num_steps_trained: 770000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,770,32979.6,770000,-4.0673,-2.07,-9.08,406.73


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 771000
  custom_metrics: {}
  date: 2021-10-25_01-41-31
  done: false
  episode_len_mean: 404.15
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -4.0414999999999575
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 4
  episodes_total: 2805
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.059203167208097e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.14466544737418494
          entropy_coeff: 0.009999999999999998
          kl: 0.007249403814686654
          policy_loss: -0.13102565639548833
          total_loss: -0.11776390158467823
          vf_explained_var: 0.26505446434020996
          vf_loss: 0.014708372903987766
    num_agent_steps_sampled: 771000
    num_agent_steps_trained: 771000
    num_steps_sampled: 771000
    num_steps_trained: 771000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,771,33026.3,771000,-4.0415,-2.07,-9.08,404.15


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 772000
  custom_metrics: {}
  date: 2021-10-25_01-42-20
  done: false
  episode_len_mean: 400.27
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -4.002699999999959
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 5
  episodes_total: 2810
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.059203167208097e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.1252696307996909
          entropy_coeff: 0.009999999999999998
          kl: 0.0007025336753342455
          policy_loss: 0.009524548633231057
          total_loss: 0.02455402778254615
          vf_explained_var: 0.058758463710546494
          vf_loss: 0.016282172594219445
    num_agent_steps_sampled: 772000
    num_agent_steps_trained: 772000
    num_steps_sampled: 772000
    num_steps_trained: 772000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,772,33075.9,772000,-4.0027,-2.07,-9.08,400.27


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 773000
  custom_metrics: {}
  date: 2021-10-25_01-43-10
  done: false
  episode_len_mean: 395.3
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -3.95299999999996
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 4
  episodes_total: 2814
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0296015836040485e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.1085529680053393
          entropy_coeff: 0.009999999999999998
          kl: 0.0006087849037784723
          policy_loss: -0.021693635235230126
          total_loss: -0.010059934854507447
          vf_explained_var: 0.16391785442829132
          vf_loss: 0.012719230353832245
    num_agent_steps_sampled: 773000
    num_agent_steps_trained: 773000
    num_steps_sampled: 773000
    num_steps_trained: 773000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,773,33125.8,773000,-3.953,-2.07,-9.08,395.3


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 774000
  custom_metrics: {}
  date: 2021-10-25_01-43-57
  done: false
  episode_len_mean: 383.87
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -3.838699999999962
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 5
  episodes_total: 2819
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5148007918020243e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.1759741307960616
          entropy_coeff: 0.009999999999999998
          kl: 0.006874867213758623
          policy_loss: -0.00844223342008061
          total_loss: 0.006703883740637038
          vf_explained_var: 0.2115296721458435
          vf_loss: 0.016905848919931386
    num_agent_steps_sampled: 774000
    num_agent_steps_trained: 774000
    num_steps_sampled: 774000
    num_steps_trained: 774000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,774,33172.3,774000,-3.8387,-2.07,-9.08,383.87




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 775000
  custom_metrics: {}
  date: 2021-10-25_01-45-05
  done: false
  episode_len_mean: 370.86
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.708599999999965
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 4
  episodes_total: 2823
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5148007918020243e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.30965036153793335
          entropy_coeff: 0.009999999999999998
          kl: 0.011333552296846329
          policy_loss: -0.05387475950022538
          total_loss: -0.044873866438865664
          vf_explained_var: 0.3522551953792572
          vf_loss: 0.012097380667304
    num_agent_steps_sampled: 775000
    num_agent_steps_trained: 775000
    num_steps_sampled: 775000
    num_steps_trained: 775000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,775,33240.8,775000,-3.7086,-1.99,-9.08,370.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 776000
  custom_metrics: {}
  date: 2021-10-25_01-45-51
  done: false
  episode_len_mean: 348.72
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.487199999999969
  episode_reward_min: -9.07999999999985
  episodes_this_iter: 5
  episodes_total: 2828
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5148007918020243e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5479072468148337
          entropy_coeff: 0.009999999999999998
          kl: 0.09260674719886153
          policy_loss: 0.025993465839160814
          total_loss: 0.02960038727356328
          vf_explained_var: 0.6514425873756409
          vf_loss: 0.009085858225201566
    num_agent_steps_sampled: 776000
    num_agent_steps_trained: 776000
    num_steps_sampled: 776000
    num_steps_trained: 776000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,776,33286.8,776000,-3.4872,-1.99,-9.08,348.72


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 777000
  custom_metrics: {}
  date: 2021-10-25_01-46-32
  done: false
  episode_len_mean: 334.78
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.3477999999999724
  episode_reward_min: -7.339999999999888
  episodes_this_iter: 3
  episodes_total: 2831
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.272201187703036e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8328128059705099
          entropy_coeff: 0.009999999999999998
          kl: 0.03777598786497776
          policy_loss: -0.14650615370935863
          total_loss: -0.14520223314563432
          vf_explained_var: 0.7067441940307617
          vf_loss: 0.009631968548314438
    num_agent_steps_sampled: 777000
    num_agent_steps_trained: 777000
    num_steps_sampled: 777000
    num_steps_trained: 777000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,777,33327.9,777000,-3.3478,-1.99,-7.34,334.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 778000
  custom_metrics: {}
  date: 2021-10-25_01-47-11
  done: false
  episode_len_mean: 319.86
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.198599999999975
  episode_reward_min: -7.119999999999893
  episodes_this_iter: 4
  episodes_total: 2835
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4083017815545534e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.148077502515581
          entropy_coeff: 0.009999999999999998
          kl: 0.05548314898091367
          policy_loss: 0.0200821116566658
          total_loss: 0.016988392257028157
          vf_explained_var: 0.6235653162002563
          vf_loss: 0.008386868978333142
    num_agent_steps_sampled: 778000
    num_agent_steps_trained: 778000
    num_steps_sampled: 778000
    num_steps_trained: 778000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,778,33366.8,778000,-3.1986,-1.99,-7.12,319.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 779000
  custom_metrics: {}
  date: 2021-10-25_01-47-45
  done: false
  episode_len_mean: 308.61
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.0860999999999787
  episode_reward_min: -6.719999999999901
  episodes_this_iter: 3
  episodes_total: 2838
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.11245267233183e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.40383327404658
          entropy_coeff: 0.009999999999999998
          kl: 0.02620536527978672
          policy_loss: 0.010766678800185522
          total_loss: 0.0037582425193654167
          vf_explained_var: 0.4340691566467285
          vf_loss: 0.007029764995806747
    num_agent_steps_sampled: 779000
    num_agent_steps_trained: 779000
    num_steps_sampled: 779000
    num_steps_trained: 779000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,779,33400.2,779000,-3.0861,-1.99,-6.72,308.61


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 780000
  custom_metrics: {}
  date: 2021-10-25_01-48-19
  done: false
  episode_len_mean: 299.36
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.9935999999999803
  episode_reward_min: -6.2799999999999105
  episodes_this_iter: 3
  episodes_total: 2841
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.668679008497742e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.544511369864146
          entropy_coeff: 0.009999999999999998
          kl: 0.021878727749904195
          policy_loss: -0.015181218418810103
          total_loss: -0.024380552189217673
          vf_explained_var: 0.5041948556900024
          vf_loss: 0.006245613280528536
    num_agent_steps_sampled: 780000
    num_agent_steps_trained: 780000
    num_steps_sampled: 780000
    num_steps_trained: 780

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,780,33434.5,780000,-2.9936,-1.99,-6.28,299.36


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 781000
  custom_metrics: {}
  date: 2021-10-25_01-48-53
  done: false
  episode_len_mean: 292.56
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.9255999999999807
  episode_reward_min: -5.1099999999999355
  episodes_this_iter: 3
  episodes_total: 2844
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.150301851274662e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.6550662159919738
          entropy_coeff: 0.009999999999999998
          kl: 0.046703414104294
          policy_loss: 0.022722282343440585
          total_loss: 0.013560098078515794
          vf_explained_var: 0.22323822975158691
          vf_loss: 0.007387937146187243
    num_agent_steps_sampled: 781000
    num_agent_steps_trained: 781000
    num_steps_sampled: 781000
    num_steps_trained: 781000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,781,33468.2,781000,-2.9256,-1.99,-5.11,292.56


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 782000
  custom_metrics: {}
  date: 2021-10-25_01-49-24
  done: false
  episode_len_mean: 288.65
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.886499999999982
  episode_reward_min: -4.679999999999945
  episodes_this_iter: 3
  episodes_total: 2847
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.725452776911993e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.569238747490777
          entropy_coeff: 0.009999999999999998
          kl: 0.015972442884636628
          policy_loss: -0.005250676804118686
          total_loss: -0.012741304768456353
          vf_explained_var: 0.07324399054050446
          vf_loss: 0.008201482866166367
    num_agent_steps_sampled: 782000
    num_agent_steps_trained: 782000
    num_steps_sampled: 782000
    num_steps_trained: 7820

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,782,33499.9,782000,-2.8865,-1.99,-4.68,288.65




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 783000
  custom_metrics: {}
  date: 2021-10-25_01-50-11
  done: false
  episode_len_mean: 284.86
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.848599999999983
  episode_reward_min: -4.679999999999945
  episodes_this_iter: 3
  episodes_total: 2850
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.725452776911993e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.448562777042389
          entropy_coeff: 0.009999999999999998
          kl: 0.02255186338660028
          policy_loss: 0.06245224293735292
          total_loss: 0.056024132751756245
          vf_explained_var: 0.4145169258117676
          vf_loss: 0.008057127754566157
    num_agent_steps_sampled: 783000
    num_agent_steps_trained: 783000
    num_steps_sampled: 783000
    num_steps_trained: 783000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,783,33546.1,783000,-2.8486,-1.99,-4.68,284.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 784000
  custom_metrics: {}
  date: 2021-10-25_01-50-49
  done: false
  episode_len_mean: 281.14
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.8113999999999844
  episode_reward_min: -4.399999999999951
  episodes_this_iter: 3
  episodes_total: 2853
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.588179165367989e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.6010671416918436
          entropy_coeff: 0.009999999999999998
          kl: 0.0436454158762534
          policy_loss: 0.0527570567611191
          total_loss: 0.04436854078537888
          vf_explained_var: 0.09281274676322937
          vf_loss: 0.007621026524187376
    num_agent_steps_sampled: 784000
    num_agent_steps_trained: 784000
    num_steps_sampled: 784000
    num_steps_trained: 784000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,784,33584.4,784000,-2.8114,-1.99,-4.4,281.14


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 785000
  custom_metrics: {}
  date: 2021-10-25_01-51-25
  done: false
  episode_len_mean: 278.95
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.789499999999984
  episode_reward_min: -4.399999999999951
  episodes_this_iter: 3
  episodes_total: 2856
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.882268748051983e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.6214887075954014
          entropy_coeff: 0.009999999999999998
          kl: 0.019962894047886297
          policy_loss: -0.02697302020258374
          total_loss: -0.03493592722548379
          vf_explained_var: -0.025469409301877022
          vf_loss: 0.008251204577067659
    num_agent_steps_sampled: 785000
    num_agent_steps_trained: 785000
    num_steps_sampled: 785000
    num_steps_trained: 785

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,785,33620.5,785000,-2.7895,-1.99,-4.4,278.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 786000
  custom_metrics: {}
  date: 2021-10-25_01-51-56
  done: false
  episode_len_mean: 277.51
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.775099999999986
  episode_reward_min: -4.399999999999951
  episodes_this_iter: 3
  episodes_total: 2859
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.882268748051983e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.755473287900289
          entropy_coeff: 0.009999999999999998
          kl: 0.020978594937409334
          policy_loss: -0.015709962364700104
          total_loss: -0.022833848082356983
          vf_explained_var: 0.2818075120449066
          vf_loss: 0.01043003247016006
    num_agent_steps_sampled: 786000
    num_agent_steps_trained: 786000
    num_steps_sampled: 786000
    num_steps_trained: 786000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,786,33651.6,786000,-2.7751,-1.99,-4.4,277.51


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 787000
  custom_metrics: {}
  date: 2021-10-25_01-52-30
  done: false
  episode_len_mean: 275.53
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.7552999999999854
  episode_reward_min: -3.9599999999999596
  episodes_this_iter: 3
  episodes_total: 2862
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.823403122077974e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.663728380203247
          entropy_coeff: 0.009999999999999998
          kl: 0.023355011847278954
          policy_loss: 0.03150539439585474
          total_loss: 0.02338505850897895
          vf_explained_var: 0.26888954639434814
          vf_loss: 0.00851558710500184
    num_agent_steps_sampled: 787000
    num_agent_steps_trained: 787000
    num_steps_sampled: 787000
    num_steps_trained: 787000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,787,33685.7,787000,-2.7553,-1.99,-3.96,275.53


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 788000
  custom_metrics: {}
  date: 2021-10-25_01-53-06
  done: false
  episode_len_mean: 275.29
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.752899999999985
  episode_reward_min: -3.9599999999999596
  episodes_this_iter: 3
  episodes_total: 2865
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.735104683116964e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.6078607704904344
          entropy_coeff: 0.009999999999999998
          kl: 0.020802392601725315
          policy_loss: -0.021448822650644513
          total_loss: -0.026011009679900277
          vf_explained_var: -0.2514176070690155
          vf_loss: 0.011514599987357441
    num_agent_steps_sampled: 788000
    num_agent_steps_trained: 788000
    num_steps_sampled: 788000
    num_steps_trained: 78

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,788,33721.6,788000,-2.7529,-1.99,-3.96,275.29


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 789000
  custom_metrics: {}
  date: 2021-10-25_01-53-41
  done: false
  episode_len_mean: 274.61
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.7460999999999847
  episode_reward_min: -3.8599999999999617
  episodes_this_iter: 3
  episodes_total: 2868
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001310265702467544
          cur_lr: 5.000000000000001e-05
          entropy: 1.5631346477402581
          entropy_coeff: 0.009999999999999998
          kl: 0.033730775722417394
          policy_loss: -0.022460671762625378
          total_loss: -0.02739092124005159
          vf_explained_var: -0.1590500771999359
          vf_loss: 0.010696677710964449
    num_agent_steps_sampled: 789000
    num_agent_steps_trained: 789000
    num_steps_sampled: 789000
    num_steps_trained: 78

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,789,33756.4,789000,-2.7461,-1.99,-3.86,274.61


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 790000
  custom_metrics: {}
  date: 2021-10-25_01-54-16
  done: false
  episode_len_mean: 276.82
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.768199999999985
  episode_reward_min: -3.8599999999999617
  episodes_this_iter: 3
  episodes_total: 2871
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00019653985537013165
          cur_lr: 5.000000000000001e-05
          entropy: 1.541853454377916
          entropy_coeff: 0.009999999999999998
          kl: 0.021314585339495117
          policy_loss: -0.07924944832921028
          total_loss: -0.0830541821817557
          vf_explained_var: 0.252885639667511
          vf_loss: 0.011609608452353213
    num_agent_steps_sampled: 790000
    num_agent_steps_trained: 790000
    num_steps_sampled: 790000
    num_steps_trained: 790000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,790,33791,790000,-2.7682,-1.99,-3.86,276.82


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 791000
  custom_metrics: {}
  date: 2021-10-25_01-54-52
  done: false
  episode_len_mean: 279.14
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.7913999999999843
  episode_reward_min: -3.8599999999999617
  episodes_this_iter: 3
  episodes_total: 2874
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002948097830551975
          cur_lr: 5.000000000000001e-05
          entropy: 1.5638839311069912
          entropy_coeff: 0.009999999999999998
          kl: 0.020383805213073536
          policy_loss: -0.13254563634594282
          total_loss: -0.13634937148955134
          vf_explained_var: 0.4635098874568939
          vf_loss: 0.011829090304672718
    num_agent_steps_sampled: 791000
    num_agent_steps_trained: 791000
    num_steps_sampled: 791000
    num_steps_trained: 7910

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,791,33827.5,791000,-2.7914,-1.99,-3.86,279.14


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 792000
  custom_metrics: {}
  date: 2021-10-25_01-55-24
  done: false
  episode_len_mean: 281.05
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.810499999999983
  episode_reward_min: -3.8599999999999617
  episodes_this_iter: 3
  episodes_total: 2877
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0004422146745827962
          cur_lr: 5.000000000000001e-05
          entropy: 1.6199537012312146
          entropy_coeff: 0.009999999999999998
          kl: 0.03031656667425175
          policy_loss: -0.12335935081872675
          total_loss: -0.12594342242098516
          vf_explained_var: 0.33727824687957764
          vf_loss: 0.01360206357203424
    num_agent_steps_sampled: 792000
    num_agent_steps_trained: 792000
    num_steps_sampled: 792000
    num_steps_trained: 792000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,792,33859.4,792000,-2.8105,-1.99,-3.86,281.05




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 793000
  custom_metrics: {}
  date: 2021-10-25_01-56-10
  done: false
  episode_len_mean: 283.84
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.838399999999983
  episode_reward_min: -3.8599999999999617
  episodes_this_iter: 3
  episodes_total: 2880
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0006633220118741944
          cur_lr: 5.000000000000001e-05
          entropy: 1.5447889937294854
          entropy_coeff: 0.009999999999999998
          kl: 0.024336802402289238
          policy_loss: -0.06860086073478062
          total_loss: -0.07109301802184846
          vf_explained_var: 0.24103009700775146
          vf_loss: 0.01293958858328147
    num_agent_steps_sampled: 793000
    num_agent_steps_trained: 793000
    num_steps_sampled: 793000
    num_steps_trained: 79300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,793,33904.9,793000,-2.8384,-1.99,-3.86,283.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 794000
  custom_metrics: {}
  date: 2021-10-25_01-56-46
  done: false
  episode_len_mean: 286.67
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.8666999999999825
  episode_reward_min: -3.8599999999999617
  episodes_this_iter: 3
  episodes_total: 2883
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009949830178112914
          cur_lr: 5.000000000000001e-05
          entropy: 1.5613294654422336
          entropy_coeff: 0.009999999999999998
          kl: 0.02897370619185774
          policy_loss: 0.018355199694633485
          total_loss: 0.01192218561967214
          vf_explained_var: -0.0015185342635959387
          vf_loss: 0.009151452701512932
    num_agent_steps_sampled: 794000
    num_agent_steps_trained: 794000
    num_steps_sampled: 794000
    num_steps_trained: 79

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,794,33941.1,794000,-2.8667,-1.99,-3.86,286.67


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 795000
  custom_metrics: {}
  date: 2021-10-25_01-57-17
  done: false
  episode_len_mean: 289.44
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.8943999999999823
  episode_reward_min: -3.92999999999996
  episodes_this_iter: 3
  episodes_total: 2886
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0014924745267169376
          cur_lr: 5.000000000000001e-05
          entropy: 1.5350092079904345
          entropy_coeff: 0.009999999999999998
          kl: 0.02474487450555386
          policy_loss: 0.027601588434643217
          total_loss: 0.022389962938096787
          vf_explained_var: 0.3885636627674103
          vf_loss: 0.010101536852825018
    num_agent_steps_sampled: 795000
    num_agent_steps_trained: 795000
    num_steps_sampled: 795000
    num_steps_trained: 795000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,795,33971.9,795000,-2.8944,-1.99,-3.93,289.44


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 796000
  custom_metrics: {}
  date: 2021-10-25_01-57-47
  done: false
  episode_len_mean: 292.22
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.9221999999999815
  episode_reward_min: -4.119999999999957
  episodes_this_iter: 2
  episodes_total: 2888
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0022387117900754053
          cur_lr: 5.000000000000001e-05
          entropy: 1.4919310688972474
          entropy_coeff: 0.009999999999999998
          kl: 0.018476606989676566
          policy_loss: -0.10091638962427775
          total_loss: -0.10463307831022474
          vf_explained_var: -0.046810273081064224
          vf_loss: 0.0111612549982965
    num_agent_steps_sampled: 796000
    num_agent_steps_trained: 796000
    num_steps_sampled: 796000
    num_steps_trained: 7960

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,796,34001.8,796000,-2.9222,-1.99,-4.12,292.22


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 797000
  custom_metrics: {}
  date: 2021-10-25_01-58-20
  done: false
  episode_len_mean: 295.6
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.9559999999999813
  episode_reward_min: -4.119999999999957
  episodes_this_iter: 3
  episodes_total: 2891
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0022387117900754053
          cur_lr: 5.000000000000001e-05
          entropy: 1.4603437476687962
          entropy_coeff: 0.009999999999999998
          kl: 0.027011051546957276
          policy_loss: -0.0979890020771159
          total_loss: -0.09696924549837907
          vf_explained_var: 0.13893429934978485
          vf_loss: 0.01556272003799677
    num_agent_steps_sampled: 797000
    num_agent_steps_trained: 797000
    num_steps_sampled: 797000
    num_steps_trained: 797000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,797,34035,797000,-2.956,-1.99,-4.12,295.6


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 798000
  custom_metrics: {}
  date: 2021-10-25_01-58-57
  done: false
  episode_len_mean: 297.49
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.9748999999999803
  episode_reward_min: -4.119999999999957
  episodes_this_iter: 4
  episodes_total: 2895
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0033580676851131088
          cur_lr: 5.000000000000001e-05
          entropy: 1.4790909038649664
          entropy_coeff: 0.009999999999999998
          kl: 0.023335792701372544
          policy_loss: -0.038652418678005535
          total_loss: -0.04106547397871812
          vf_explained_var: 0.3203679323196411
          vf_loss: 0.012299490492377016
    num_agent_steps_sampled: 798000
    num_agent_steps_trained: 798000
    num_steps_sampled: 798000
    num_steps_trained: 7980

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,798,34071.6,798000,-2.9749,-1.99,-4.12,297.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 799000
  custom_metrics: {}
  date: 2021-10-25_01-59-31
  done: false
  episode_len_mean: 299.09
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.9908999999999804
  episode_reward_min: -4.119999999999957
  episodes_this_iter: 3
  episodes_total: 2898
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005037101527669663
          cur_lr: 5.000000000000001e-05
          entropy: 1.5733982549773322
          entropy_coeff: 0.009999999999999998
          kl: 0.0397950271874273
          policy_loss: -0.005065987424718009
          total_loss: -0.016041604181130726
          vf_explained_var: 0.6215142011642456
          vf_loss: 0.004557912135755436
    num_agent_steps_sampled: 799000
    num_agent_steps_trained: 799000
    num_steps_sampled: 799000
    num_steps_trained: 799000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,799,34106.3,799000,-2.9909,-1.99,-4.12,299.09


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 800000
  custom_metrics: {}
  date: 2021-10-25_02-00-05
  done: false
  episode_len_mean: 301.98
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.019799999999979
  episode_reward_min: -4.119999999999957
  episodes_this_iter: 3
  episodes_total: 2901
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007555652291504495
          cur_lr: 5.000000000000001e-05
          entropy: 1.5769382860925463
          entropy_coeff: 0.009999999999999998
          kl: 0.03412553598173946
          policy_loss: -0.017401033143202464
          total_loss: -0.0276564445760515
          vf_explained_var: 0.7494943141937256
          vf_loss: 0.005256129858187504
    num_agent_steps_sampled: 800000
    num_agent_steps_trained: 800000
    num_steps_sampled: 800000
    num_steps_trained: 800000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,800,34140.1,800000,-3.0198,-1.99,-4.12,301.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 801000
  custom_metrics: {}
  date: 2021-10-25_02-00-41
  done: false
  episode_len_mean: 303.85
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.0384999999999787
  episode_reward_min: -4.119999999999957
  episodes_this_iter: 3
  episodes_total: 2904
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01133347843725674
          cur_lr: 5.000000000000001e-05
          entropy: 1.6249180807007684
          entropy_coeff: 0.009999999999999998
          kl: 0.022997939773307743
          policy_loss: -0.04936442838774787
          total_loss: -0.05517401877376768
          vf_explained_var: 0.7161780595779419
          vf_loss: 0.01017894773847527
    num_agent_steps_sampled: 801000
    num_agent_steps_trained: 801000
    num_steps_sampled: 801000
    num_steps_trained: 801000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,801,34175.5,801000,-3.0385,-1.99,-4.12,303.85


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 802000
  custom_metrics: {}
  date: 2021-10-25_02-01-15
  done: false
  episode_len_mean: 307.08
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.0707999999999775
  episode_reward_min: -4.119999999999957
  episodes_this_iter: 3
  episodes_total: 2907
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01700021765588511
          cur_lr: 5.000000000000001e-05
          entropy: 1.6742394288380942
          entropy_coeff: 0.009999999999999998
          kl: 0.019145108067204496
          policy_loss: -0.03671161946323183
          total_loss: -0.04761288579967287
          vf_explained_var: 0.7577926516532898
          vf_loss: 0.00551565640942297
    num_agent_steps_sampled: 802000
    num_agent_steps_trained: 802000
    num_steps_sampled: 802000
    num_steps_trained: 802000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,802,34209.3,802000,-3.0708,-1.99,-4.12,307.08




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 803000
  custom_metrics: {}
  date: 2021-10-25_02-02-03
  done: false
  episode_len_mean: 310.95
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.109499999999977
  episode_reward_min: -4.119999999999957
  episodes_this_iter: 3
  episodes_total: 2910
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01700021765588511
          cur_lr: 5.000000000000001e-05
          entropy: 1.5354005601671008
          entropy_coeff: 0.009999999999999998
          kl: 0.01374647079321664
          policy_loss: -0.002657550987270143
          total_loss: -0.011697052419185639
          vf_explained_var: 0.7420276999473572
          vf_loss: 0.006080811944816055
    num_agent_steps_sampled: 803000
    num_agent_steps_trained: 803000
    num_steps_sampled: 803000
    num_steps_trained: 803000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,803,34257.4,803000,-3.1095,-1.99,-4.12,310.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 804000
  custom_metrics: {}
  date: 2021-10-25_02-02-36
  done: false
  episode_len_mean: 313.58
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.135799999999976
  episode_reward_min: -4.119999999999957
  episodes_this_iter: 2
  episodes_total: 2912
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01700021765588511
          cur_lr: 5.000000000000001e-05
          entropy: 1.4840380628903707
          entropy_coeff: 0.009999999999999998
          kl: 0.01532471639867444
          policy_loss: -0.15901218362980418
          total_loss: -0.1670464581913418
          vf_explained_var: 0.703785240650177
          vf_loss: 0.006545582108406557
    num_agent_steps_sampled: 804000
    num_agent_steps_trained: 804000
    num_steps_sampled: 804000
    num_steps_trained: 804000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,804,34290.9,804000,-3.1358,-1.99,-4.12,313.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 805000
  custom_metrics: {}
  date: 2021-10-25_02-03-08
  done: false
  episode_len_mean: 318.17
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.1816999999999758
  episode_reward_min: -4.119999999999957
  episodes_this_iter: 3
  episodes_total: 2915
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01700021765588511
          cur_lr: 5.000000000000001e-05
          entropy: 1.4991908722453646
          entropy_coeff: 0.009999999999999998
          kl: 0.022518410369805508
          policy_loss: 0.01765236192279392
          total_loss: 0.011548346032698949
          vf_explained_var: 0.27316176891326904
          vf_loss: 0.008505073609477323
    num_agent_steps_sampled: 805000
    num_agent_steps_trained: 805000
    num_steps_sampled: 805000
    num_steps_trained: 805000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,805,34322.5,805000,-3.1817,-1.99,-4.12,318.17


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 806000
  custom_metrics: {}
  date: 2021-10-25_02-03-36
  done: false
  episode_len_mean: 322.66
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.2265999999999746
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 2
  episodes_total: 2917
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.025500326483827666
          cur_lr: 5.000000000000001e-05
          entropy: 1.3069585310088263
          entropy_coeff: 0.009999999999999998
          kl: 0.02292971920740106
          policy_loss: -0.11455442243152195
          total_loss: -0.11891754385497835
          vf_explained_var: 0.2105465978384018
          vf_loss: 0.008121752380652146
    num_agent_steps_sampled: 806000
    num_agent_steps_trained: 806000
    num_steps_sampled: 806000
    num_steps_trained: 806000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,806,34350.7,806000,-3.2266,-1.99,-4.52,322.66


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 807000
  custom_metrics: {}
  date: 2021-10-25_02-04-07
  done: false
  episode_len_mean: 327.68
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.2767999999999735
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 2920
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.038250489725741514
          cur_lr: 5.000000000000001e-05
          entropy: 1.5517201317681206
          entropy_coeff: 0.009999999999999998
          kl: 0.016143708143831266
          policy_loss: 0.0694468370742268
          total_loss: 0.06313195907407337
          vf_explained_var: -0.0010497762123122811
          vf_loss: 0.008584817617924676
    num_agent_steps_sampled: 807000
    num_agent_steps_trained: 807000
    num_steps_sampled: 807000
    num_steps_trained: 807000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,807,34382,807000,-3.2768,-2.24,-4.52,327.68


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 808000
  custom_metrics: {}
  date: 2021-10-25_02-04-41
  done: false
  episode_len_mean: 331.31
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.3130999999999737
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 2923
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.038250489725741514
          cur_lr: 5.000000000000001e-05
          entropy: 1.6269607914818658
          entropy_coeff: 0.009999999999999998
          kl: 0.012467165750957557
          policy_loss: 0.07939093526866702
          total_loss: 0.07174470590220558
          vf_explained_var: 0.490927129983902
          vf_loss: 0.008146499637385002
    num_agent_steps_sampled: 808000
    num_agent_steps_trained: 808000
    num_steps_sampled: 808000
    num_steps_trained: 808000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,808,34416,808000,-3.3131,-2.28,-4.52,331.31


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 809000
  custom_metrics: {}
  date: 2021-10-25_02-05-15
  done: false
  episode_len_mean: 334.62
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.346199999999973
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 2926
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.038250489725741514
          cur_lr: 5.000000000000001e-05
          entropy: 1.742487010690901
          entropy_coeff: 0.009999999999999998
          kl: 0.023799660088434257
          policy_loss: 0.09237948440843158
          total_loss: 0.08342917271786265
          vf_explained_var: 0.3605450391769409
          vf_loss: 0.007564209026491476
    num_agent_steps_sampled: 809000
    num_agent_steps_trained: 809000
    num_steps_sampled: 809000
    num_steps_trained: 809000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,809,34449.7,809000,-3.3462,-2.32,-4.52,334.62


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 810000
  custom_metrics: {}
  date: 2021-10-25_02-05-51
  done: false
  episode_len_mean: 337.33
  episode_media: {}
  episode_reward_max: -2.479999999999991
  episode_reward_mean: -3.3732999999999715
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 2929
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05737573458861226
          cur_lr: 5.000000000000001e-05
          entropy: 1.7421201586723327
          entropy_coeff: 0.009999999999999998
          kl: 0.015641343696735373
          policy_loss: 0.053866249033146435
          total_loss: 0.045833234406179854
          vf_explained_var: -0.02267586626112461
          vf_loss: 0.008490755463329454
    num_agent_steps_sampled: 810000
    num_agent_steps_trained: 810000
    num_steps_sampled: 810000
    num_steps_trained: 810000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,810,34485.2,810000,-3.3733,-2.48,-4.52,337.33


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 811000
  custom_metrics: {}
  date: 2021-10-25_02-06-27
  done: false
  episode_len_mean: 338.36
  episode_media: {}
  episode_reward_max: -2.7999999999999843
  episode_reward_mean: -3.3835999999999715
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 2932
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05737573458861226
          cur_lr: 5.000000000000001e-05
          entropy: 1.6727076702647738
          entropy_coeff: 0.009999999999999998
          kl: 0.02123936638558188
          policy_loss: 0.00876532644033432
          total_loss: 0.00416477769613266
          vf_explained_var: 0.060146134346723557
          vf_loss: 0.010907903243787587
    num_agent_steps_sampled: 811000
    num_agent_steps_trained: 811000
    num_steps_sampled: 811000
    num_steps_trained: 811000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,811,34521.1,811000,-3.3836,-2.8,-4.52,338.36


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 812000
  custom_metrics: {}
  date: 2021-10-25_02-07-02
  done: false
  episode_len_mean: 339.59
  episode_media: {}
  episode_reward_max: -2.7999999999999843
  episode_reward_mean: -3.395899999999971
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 2935
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08606360188291838
          cur_lr: 5.000000000000001e-05
          entropy: 1.7320707506603665
          entropy_coeff: 0.009999999999999998
          kl: 0.009934363081034785
          policy_loss: 0.05027535061041514
          total_loss: 0.04411336514684889
          vf_explained_var: 0.05702958256006241
          vf_loss: 0.01030373336462718
    num_agent_steps_sampled: 812000
    num_agent_steps_trained: 812000
    num_steps_sampled: 812000
    num_steps_trained: 812000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,812,34556.2,812000,-3.3959,-2.8,-4.52,339.59


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 813000
  custom_metrics: {}
  date: 2021-10-25_02-07-36
  done: false
  episode_len_mean: 339.37
  episode_media: {}
  episode_reward_max: -2.869999999999983
  episode_reward_mean: -3.393699999999971
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 2938
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08606360188291838
          cur_lr: 5.000000000000001e-05
          entropy: 1.7865479800436233
          entropy_coeff: 0.009999999999999998
          kl: 0.011880418002847806
          policy_loss: -0.0033543528781996832
          total_loss: -0.009666809522443347
          vf_explained_var: 0.32395485043525696
          vf_loss: 0.010530551896145981
    num_agent_steps_sampled: 813000
    num_agent_steps_trained: 813000
    num_steps_sampled: 813000
    num_steps_trained: 81300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,813,34590.8,813000,-3.3937,-2.87,-4.52,339.37




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 814000
  custom_metrics: {}
  date: 2021-10-25_02-08-32
  done: false
  episode_len_mean: 338.3
  episode_media: {}
  episode_reward_max: -2.629999999999988
  episode_reward_mean: -3.382999999999971
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 2941
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08606360188291838
          cur_lr: 5.000000000000001e-05
          entropy: 1.7081176082293192
          entropy_coeff: 0.009999999999999998
          kl: 0.015086893755219846
          policy_loss: -0.13366425931453704
          total_loss: -0.1334900614288118
          vf_explained_var: 0.1851601004600525
          vf_loss: 0.01595694264397025
    num_agent_steps_sampled: 814000
    num_agent_steps_trained: 814000
    num_steps_sampled: 814000
    num_steps_trained: 814000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,814,34646.6,814000,-3.383,-2.63,-4.52,338.3


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 815000
  custom_metrics: {}
  date: 2021-10-25_02-09-08
  done: false
  episode_len_mean: 337.4
  episode_media: {}
  episode_reward_max: -2.629999999999988
  episode_reward_mean: -3.373999999999972
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 4
  episodes_total: 2945
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08606360188291838
          cur_lr: 5.000000000000001e-05
          entropy: 1.607074941529168
          entropy_coeff: 0.009999999999999998
          kl: 0.016109164475528813
          policy_loss: -0.025649949411551156
          total_loss: -0.026522531939877404
          vf_explained_var: 0.30510279536247253
          vf_loss: 0.013811752758920192
    num_agent_steps_sampled: 815000
    num_agent_steps_trained: 815000
    num_steps_sampled: 815000
    num_steps_trained: 815000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,815,34682.8,815000,-3.374,-2.63,-4.52,337.4


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 816000
  custom_metrics: {}
  date: 2021-10-25_02-09-44
  done: false
  episode_len_mean: 336.45
  episode_media: {}
  episode_reward_max: -2.629999999999988
  episode_reward_mean: -3.3644999999999716
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 2948
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08606360188291838
          cur_lr: 5.000000000000001e-05
          entropy: 1.6318519247902765
          entropy_coeff: 0.009999999999999998
          kl: 0.013834692211562959
          policy_loss: 0.03227072184284528
          total_loss: 0.02585694698823823
          vf_explained_var: 0.5462141036987305
          vf_loss: 0.008714081529372681
    num_agent_steps_sampled: 816000
    num_agent_steps_trained: 816000
    num_steps_sampled: 816000
    num_steps_trained: 816000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,816,34718,816000,-3.3645,-2.63,-4.52,336.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 817000
  custom_metrics: {}
  date: 2021-10-25_02-10-25
  done: false
  episode_len_mean: 335.62
  episode_media: {}
  episode_reward_max: -2.629999999999988
  episode_reward_mean: -3.356199999999971
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 2951
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08606360188291838
          cur_lr: 5.000000000000001e-05
          entropy: 1.660327853096856
          entropy_coeff: 0.009999999999999998
          kl: 0.016937378162117118
          policy_loss: -0.11266037556860183
          total_loss: -0.12016016128990385
          vf_explained_var: 0.5354763269424438
          vf_loss: 0.007645801255582936
    num_agent_steps_sampled: 817000
    num_agent_steps_trained: 817000
    num_steps_sampled: 817000
    num_steps_trained: 817000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,817,34758.9,817000,-3.3562,-2.63,-4.52,335.62


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 818000
  custom_metrics: {}
  date: 2021-10-25_02-11-02
  done: false
  episode_len_mean: 334.81
  episode_media: {}
  episode_reward_max: -2.629999999999988
  episode_reward_mean: -3.3480999999999734
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 4
  episodes_total: 2955
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08606360188291838
          cur_lr: 5.000000000000001e-05
          entropy: 1.543713292810652
          entropy_coeff: 0.009999999999999998
          kl: 0.01726795420270493
          policy_loss: -0.036219258606433866
          total_loss: -0.03774727260073026
          vf_explained_var: 0.4295153319835663
          vf_loss: 0.012422972855468591
    num_agent_steps_sampled: 818000
    num_agent_steps_trained: 818000
    num_steps_sampled: 818000
    num_steps_trained: 818000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,818,34795.8,818000,-3.3481,-2.63,-4.52,334.81


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 819000
  custom_metrics: {}
  date: 2021-10-25_02-11-41
  done: false
  episode_len_mean: 332.85
  episode_media: {}
  episode_reward_max: -2.629999999999988
  episode_reward_mean: -3.328499999999973
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 2958
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08606360188291838
          cur_lr: 5.000000000000001e-05
          entropy: 1.5831772168477376
          entropy_coeff: 0.009999999999999998
          kl: 0.009828549980407095
          policy_loss: 0.011027074025736914
          total_loss: 0.003283652083741294
          vf_explained_var: 0.5436316728591919
          vf_loss: 0.007242468576360908
    num_agent_steps_sampled: 819000
    num_agent_steps_trained: 819000
    num_steps_sampled: 819000
    num_steps_trained: 819000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,819,34835.6,819000,-3.3285,-2.63,-4.52,332.85


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 820000
  custom_metrics: {}
  date: 2021-10-25_02-12-22
  done: false
  episode_len_mean: 330.75
  episode_media: {}
  episode_reward_max: -2.629999999999988
  episode_reward_mean: -3.3074999999999726
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 4
  episodes_total: 2962
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08606360188291838
          cur_lr: 5.000000000000001e-05
          entropy: 1.5624140501022339
          entropy_coeff: 0.009999999999999998
          kl: 0.02527495116714344
          policy_loss: 0.001595948843492402
          total_loss: -0.0034662396129634646
          vf_explained_var: 0.6269434094429016
          vf_loss: 0.00838669666296078
    num_agent_steps_sampled: 820000
    num_agent_steps_trained: 820000
    num_steps_sampled: 820000
    num_steps_trained: 820000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,820,34876.2,820000,-3.3075,-2.63,-4.52,330.75


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 821000
  custom_metrics: {}
  date: 2021-10-25_02-13-01
  done: false
  episode_len_mean: 329.85
  episode_media: {}
  episode_reward_max: -2.629999999999988
  episode_reward_mean: -3.2984999999999722
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 2965
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1290954028243776
          cur_lr: 5.000000000000001e-05
          entropy: 1.5798556950357225
          entropy_coeff: 0.009999999999999998
          kl: 0.025765867065924652
          policy_loss: 0.04409643109473917
          total_loss: 0.03971983210908042
          vf_explained_var: 0.5779491662979126
          vf_loss: 0.008095703361969854
    num_agent_steps_sampled: 821000
    num_agent_steps_trained: 821000
    num_steps_sampled: 821000
    num_steps_trained: 821000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,821,34915.4,821000,-3.2985,-2.63,-4.52,329.85


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 822000
  custom_metrics: {}
  date: 2021-10-25_02-13-39
  done: false
  episode_len_mean: 328.26
  episode_media: {}
  episode_reward_max: -2.629999999999988
  episode_reward_mean: -3.2825999999999738
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 4
  episodes_total: 2969
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.5780276589923434
          entropy_coeff: 0.009999999999999998
          kl: 0.00877285991875954
          policy_loss: -0.009667550524075827
          total_loss: -0.015091095036930508
          vf_explained_var: 0.6232447028160095
          vf_loss: 0.008657926057154933
    num_agent_steps_sampled: 822000
    num_agent_steps_trained: 822000
    num_steps_sampled: 822000
    num_steps_trained: 822000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,822,34953.3,822000,-3.2826,-2.63,-4.52,328.26




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 823000
  custom_metrics: {}
  date: 2021-10-25_02-14-34
  done: false
  episode_len_mean: 327.42
  episode_media: {}
  episode_reward_max: -2.629999999999988
  episode_reward_mean: -3.2741999999999734
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 2972
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.5462586257192823
          entropy_coeff: 0.009999999999999998
          kl: 0.013541482167150162
          policy_loss: 0.07250516207681762
          total_loss: 0.06708716145820087
          vf_explained_var: 0.4512713551521301
          vf_loss: 0.007422372136109819
    num_agent_steps_sampled: 823000
    num_agent_steps_trained: 823000
    num_steps_sampled: 823000
    num_steps_trained: 823000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,823,35007.9,823000,-3.2742,-2.63,-4.52,327.42


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 824000
  custom_metrics: {}
  date: 2021-10-25_02-15-09
  done: false
  episode_len_mean: 327.39
  episode_media: {}
  episode_reward_max: -2.629999999999988
  episode_reward_mean: -3.2738999999999736
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 2975
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.57525757683648
          entropy_coeff: 0.009999999999999998
          kl: 0.016211946187428338
          policy_loss: -0.004042302320400874
          total_loss: -0.0066359409855471715
          vf_explained_var: 0.24916137754917145
          vf_loss: 0.010019602605866061
    num_agent_steps_sampled: 824000
    num_agent_steps_trained: 824000
    num_steps_sampled: 824000
    num_steps_trained: 824000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,824,35042.9,824000,-3.2739,-2.63,-4.52,327.39


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 825000
  custom_metrics: {}
  date: 2021-10-25_02-15-47
  done: false
  episode_len_mean: 325.79
  episode_media: {}
  episode_reward_max: -2.629999999999988
  episode_reward_mean: -3.257899999999974
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 2978
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.5547449482811821
          entropy_coeff: 0.009999999999999998
          kl: 0.013600000896239806
          policy_loss: -0.1298810417453448
          total_loss: -0.12791279090775384
          vf_explained_var: 0.21851375699043274
          vf_loss: 0.014882154359171788
    num_agent_steps_sampled: 825000
    num_agent_steps_trained: 825000
    num_steps_sampled: 825000
    num_steps_trained: 825000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,825,35081.3,825000,-3.2579,-2.63,-4.52,325.79


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 826000
  custom_metrics: {}
  date: 2021-10-25_02-16-27
  done: false
  episode_len_mean: 323.48
  episode_media: {}
  episode_reward_max: -2.629999999999988
  episode_reward_mean: -3.234799999999974
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 4
  episodes_total: 2982
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.461885964870453
          entropy_coeff: 0.009999999999999998
          kl: 0.008059640568217298
          policy_loss: 0.0255144907368554
          total_loss: 0.022220727718538707
          vf_explained_var: 0.25608155131340027
          vf_loss: 0.009764400652299325
    num_agent_steps_sampled: 826000
    num_agent_steps_trained: 826000
    num_steps_sampled: 826000
    num_steps_trained: 826000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,826,35121.3,826000,-3.2348,-2.63,-4.52,323.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 827000
  custom_metrics: {}
  date: 2021-10-25_02-17-07
  done: false
  episode_len_mean: 320.8
  episode_media: {}
  episode_reward_max: -2.629999999999988
  episode_reward_mean: -3.2079999999999744
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 4
  episodes_total: 2986
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.4612636460198296
          entropy_coeff: 0.009999999999999998
          kl: 0.010260265550475368
          policy_loss: 0.014806818382607567
          total_loss: 0.014299261735545264
          vf_explained_var: 0.1753164529800415
          vf_loss: 0.012118250514484114
    num_agent_steps_sampled: 827000
    num_agent_steps_trained: 827000
    num_steps_sampled: 827000
    num_steps_trained: 827000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,827,35161.3,827000,-3.208,-2.63,-4.52,320.8


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 828000
  custom_metrics: {}
  date: 2021-10-25_02-17-48
  done: false
  episode_len_mean: 317.41
  episode_media: {}
  episode_reward_max: -2.619999999999988
  episode_reward_mean: -3.1740999999999757
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 2989
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.6017496109008789
          entropy_coeff: 0.009999999999999998
          kl: 0.013877846619808861
          policy_loss: -0.016431063123875194
          total_loss: -0.02151097845700052
          vf_explained_var: 0.40482333302497864
          vf_loss: 0.008250229430591895
    num_agent_steps_sampled: 828000
    num_agent_steps_trained: 828000
    num_steps_sampled: 828000
    num_steps_trained: 828000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,828,35202.2,828000,-3.1741,-2.62,-4.52,317.41


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 829000
  custom_metrics: {}
  date: 2021-10-25_02-18-27
  done: false
  episode_len_mean: 314.98
  episode_media: {}
  episode_reward_max: -2.619999999999988
  episode_reward_mean: -3.149799999999977
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 4
  episodes_total: 2993
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.5590606782171461
          entropy_coeff: 0.009999999999999998
          kl: 0.011691554245143405
          policy_loss: 0.015613953934775458
          total_loss: 0.012578111680017576
          vf_explained_var: 0.4269084632396698
          vf_loss: 0.010290776037921508
    num_agent_steps_sampled: 829000
    num_agent_steps_trained: 829000
    num_steps_sampled: 829000
    num_steps_trained: 829000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,829,35241.3,829000,-3.1498,-2.62,-4.52,314.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 830000
  custom_metrics: {}
  date: 2021-10-25_02-19-04
  done: false
  episode_len_mean: 314.93
  episode_media: {}
  episode_reward_max: -2.619999999999988
  episode_reward_mean: -3.149299999999976
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 2996
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.6312350935406155
          entropy_coeff: 0.009999999999999998
          kl: 0.014470973391206125
          policy_loss: -0.0037186109357410005
          total_loss: -0.009553178648153941
          vf_explained_var: 0.5127128958702087
          vf_loss: 0.0076755824990363585
    num_agent_steps_sampled: 830000
    num_agent_steps_trained: 830000
    num_steps_sampled: 830000
    num_steps_trained: 830000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,830,35278,830000,-3.1493,-2.62,-4.52,314.93




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 831000
  custom_metrics: {}
  date: 2021-10-25_02-19-58
  done: false
  episode_len_mean: 313.17
  episode_media: {}
  episode_reward_max: -2.619999999999988
  episode_reward_mean: -3.131699999999977
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 4
  episodes_total: 3000
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.5665113157696193
          entropy_coeff: 0.009999999999999998
          kl: 0.01295593966515991
          policy_loss: -0.021697235935264163
          total_loss: -0.0237507422765096
          vf_explained_var: 0.5008807182312012
          vf_loss: 0.011102778795692657
    num_agent_steps_sampled: 831000
    num_agent_steps_trained: 831000
    num_steps_sampled: 831000
    num_steps_trained: 831000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,831,35332.1,831000,-3.1317,-2.62,-4.52,313.17


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 832000
  custom_metrics: {}
  date: 2021-10-25_02-20-38
  done: false
  episode_len_mean: 311.58
  episode_media: {}
  episode_reward_max: -2.619999999999988
  episode_reward_mean: -3.115799999999977
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 3003
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.4921011143260532
          entropy_coeff: 0.009999999999999998
          kl: 0.008666443911217866
          policy_loss: 0.018755725026130675
          total_loss: 0.013547862238354153
          vf_explained_var: 0.6399058103561401
          vf_loss: 0.00803495183483594
    num_agent_steps_sampled: 832000
    num_agent_steps_trained: 832000
    num_steps_sampled: 832000
    num_steps_trained: 832000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,832,35372.1,832000,-3.1158,-2.62,-4.52,311.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 833000
  custom_metrics: {}
  date: 2021-10-25_02-21-18
  done: false
  episode_len_mean: 310.03
  episode_media: {}
  episode_reward_max: -2.619999999999988
  episode_reward_mean: -3.100299999999977
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 4
  episodes_total: 3007
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.4323580437236363
          entropy_coeff: 0.009999999999999998
          kl: 0.007272566416334788
          policy_loss: 0.0015406731930043963
          total_loss: -0.00019061043858528137
          vf_explained_var: 0.5205957889556885
          vf_loss: 0.011184010892692539
    num_agent_steps_sampled: 833000
    num_agent_steps_trained: 833000
    num_steps_sampled: 833000
    num_steps_trained: 833000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,833,35412,833000,-3.1003,-2.62,-4.52,310.03


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 834000
  custom_metrics: {}
  date: 2021-10-25_02-22-01
  done: false
  episode_len_mean: 307.66
  episode_media: {}
  episode_reward_max: -2.619999999999988
  episode_reward_mean: -3.0765999999999782
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 3
  episodes_total: 3010
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.3750289824273851
          entropy_coeff: 0.009999999999999998
          kl: 0.014477285852988213
          policy_loss: -0.10188646324806744
          total_loss: -0.101962800986237
          vf_explained_var: 0.46223556995391846
          vf_loss: 0.01087052292811374
    num_agent_steps_sampled: 834000
    num_agent_steps_trained: 834000
    num_steps_sampled: 834000
    num_steps_trained: 834000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,834,35454.7,834000,-3.0766,-2.62,-4.52,307.66


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 835000
  custom_metrics: {}
  date: 2021-10-25_02-22-42
  done: false
  episode_len_mean: 303.77
  episode_media: {}
  episode_reward_max: -2.619999999999988
  episode_reward_mean: -3.0376999999999788
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 4
  episodes_total: 3014
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.3435503443082173
          entropy_coeff: 0.009999999999999998
          kl: 0.013662840630339524
          policy_loss: 0.008398778902159797
          total_loss: 0.006032671282688777
          vf_explained_var: 0.6333718299865723
          vf_loss: 0.008423681671006811
    num_agent_steps_sampled: 835000
    num_agent_steps_trained: 835000
    num_steps_sampled: 835000
    num_steps_trained: 835000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,835,35495.9,835000,-3.0377,-2.62,-4.52,303.77


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 836000
  custom_metrics: {}
  date: 2021-10-25_02-23-22
  done: false
  episode_len_mean: 298.05
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -2.98049999999998
  episode_reward_min: -3.799999999999963
  episodes_this_iter: 4
  episodes_total: 3018
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.1883575333489311
          entropy_coeff: 0.009999999999999998
          kl: 0.014218866885336941
          policy_loss: 0.01684895091586643
          total_loss: 0.01918756374054485
          vf_explained_var: 0.5617132782936096
          vf_loss: 0.011468801005846924
    num_agent_steps_sampled: 836000
    num_agent_steps_trained: 836000
    num_steps_sampled: 836000
    num_steps_trained: 836000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,836,35535.7,836000,-2.9805,-2.61,-3.8,298.05


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 837000
  custom_metrics: {}
  date: 2021-10-25_02-24-05
  done: false
  episode_len_mean: 293.99
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.9398999999999815
  episode_reward_min: -3.619999999999967
  episodes_this_iter: 4
  episodes_total: 3022
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.142722365591261
          entropy_coeff: 0.009999999999999998
          kl: 0.00836386678628966
          policy_loss: 0.022300544960631264
          total_loss: 0.0230038534435961
          vf_explained_var: 0.34083324670791626
          vf_loss: 0.01051092803892162
    num_agent_steps_sampled: 837000
    num_agent_steps_trained: 837000
    num_steps_sampled: 837000
    num_steps_trained: 837000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,837,35579.1,837000,-2.9399,-2.26,-3.62,293.99


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 838000
  custom_metrics: {}
  date: 2021-10-25_02-24-48
  done: false
  episode_len_mean: 291.62
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.9161999999999817
  episode_reward_min: -3.619999999999967
  episodes_this_iter: 3
  episodes_total: 3025
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.0883157915539212
          entropy_coeff: 0.009999999999999998
          kl: 0.012683929515320989
          policy_loss: -0.09055402312013838
          total_loss: -0.0874637200600571
          vf_explained_var: 0.26903200149536133
          vf_loss: 0.011517302557412122
    num_agent_steps_sampled: 838000
    num_agent_steps_trained: 838000
    num_steps_sampled: 838000
    num_steps_trained: 838000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,838,35621.7,838000,-2.9162,-2.26,-3.62,291.62


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 839000
  custom_metrics: {}
  date: 2021-10-25_02-25-31
  done: false
  episode_len_mean: 288.9
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.888999999999983
  episode_reward_min: -3.619999999999967
  episodes_this_iter: 4
  episodes_total: 3029
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.0734668956862556
          entropy_coeff: 0.009999999999999998
          kl: 0.005520990771849278
          policy_loss: -0.02054238584306505
          total_loss: -0.02039340502685971
          vf_explained_var: 0.4310866892337799
          vf_loss: 0.009814546960923407
    num_agent_steps_sampled: 839000
    num_agent_steps_trained: 839000
    num_steps_sampled: 839000
    num_steps_trained: 839000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,839,35664.3,839000,-2.889,-2.26,-3.62,288.9




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 840000
  custom_metrics: {}
  date: 2021-10-25_02-26-28
  done: false
  episode_len_mean: 286.92
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.869199999999982
  episode_reward_min: -3.619999999999967
  episodes_this_iter: 4
  episodes_total: 3033
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.0057432015736898
          entropy_coeff: 0.009999999999999998
          kl: 0.009110444055969641
          policy_loss: -0.002818760896722476
          total_loss: 0.0006574240409665638
          vf_explained_var: 0.37345021963119507
          vf_loss: 0.011769443139847782
    num_agent_steps_sampled: 840000
    num_agent_steps_trained: 840000
    num_steps_sampled: 840000
    num_steps_trained: 840000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,840,35721.9,840000,-2.8692,-2.26,-3.62,286.92


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 841000
  custom_metrics: {}
  date: 2021-10-25_02-27-10
  done: false
  episode_len_mean: 284.31
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.843099999999983
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 3037
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.0617437501748404
          entropy_coeff: 0.009999999999999998
          kl: 0.03803951606447467
          policy_loss: 0.0006616909470823076
          total_loss: 0.006866904430919223
          vf_explained_var: 0.4687923789024353
          vf_loss: 0.00945655998463432
    num_agent_steps_sampled: 841000
    num_agent_steps_trained: 841000
    num_steps_sampled: 841000
    num_steps_trained: 841000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,841,35764,841000,-2.8431,-2.26,-3.5,284.31


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 842000
  custom_metrics: {}
  date: 2021-10-25_02-27-52
  done: false
  episode_len_mean: 283.91
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.8390999999999833
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 3
  episodes_total: 3040
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2904646563548496
          cur_lr: 5.000000000000001e-05
          entropy: 0.8268955681059096
          entropy_coeff: 0.009999999999999998
          kl: 0.005669361657256841
          policy_loss: -0.06944897034102016
          total_loss: -0.06470017159978549
          vf_explained_var: 0.3964228332042694
          vf_loss: 0.011371003256903755
    num_agent_steps_sampled: 842000
    num_agent_steps_trained: 842000
    num_steps_sampled: 842000
    num_steps_trained: 842000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,842,35805.8,842000,-2.8391,-2.26,-3.5,283.91


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 843000
  custom_metrics: {}
  date: 2021-10-25_02-28-32
  done: false
  episode_len_mean: 281.76
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.817599999999984
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 3044
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2904646563548496
          cur_lr: 5.000000000000001e-05
          entropy: 0.866926419072681
          entropy_coeff: 0.009999999999999998
          kl: 0.003309207638261253
          policy_loss: 0.0335286205013593
          total_loss: 0.03739499383502536
          vf_explained_var: 0.4013431668281555
          vf_loss: 0.01157442977031072
    num_agent_steps_sampled: 843000
    num_agent_steps_trained: 843000
    num_steps_sampled: 843000
    num_steps_trained: 843000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,843,35845.3,843000,-2.8176,-2.26,-3.5,281.76


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 844000
  custom_metrics: {}
  date: 2021-10-25_02-29-13
  done: false
  episode_len_mean: 280.55
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.8054999999999835
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 3048
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1452323281774248
          cur_lr: 5.000000000000001e-05
          entropy: 0.8256967352496253
          entropy_coeff: 0.009999999999999998
          kl: 0.007221732039126513
          policy_loss: -0.01239868187242084
          total_loss: -0.006520419691999753
          vf_explained_var: 0.3376072347164154
          vf_loss: 0.013086402354141076
    num_agent_steps_sampled: 844000
    num_agent_steps_trained: 844000
    num_steps_sampled: 844000
    num_steps_trained: 844000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,844,35886.6,844000,-2.8055,-2.26,-3.5,280.55


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 845000
  custom_metrics: {}
  date: 2021-10-25_02-29-57
  done: false
  episode_len_mean: 279.87
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.798699999999984
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 3
  episodes_total: 3051
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1452323281774248
          cur_lr: 5.000000000000001e-05
          entropy: 0.7972704046302371
          entropy_coeff: 0.009999999999999998
          kl: 0.00629910823132636
          policy_loss: -0.11395721526609527
          total_loss: -0.10926494970917702
          vf_explained_var: 0.34091806411743164
          vf_loss: 0.011750132549140188
    num_agent_steps_sampled: 845000
    num_agent_steps_trained: 845000
    num_steps_sampled: 845000
    num_steps_trained: 845000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,845,35931,845000,-2.7987,-2.26,-3.5,279.87


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 846000
  custom_metrics: {}
  date: 2021-10-25_02-30-41
  done: false
  episode_len_mean: 277.97
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.7796999999999845
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 3055
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1452323281774248
          cur_lr: 5.000000000000001e-05
          entropy: 0.7770917461978064
          entropy_coeff: 0.009999999999999998
          kl: 0.005043540111719036
          policy_loss: -0.0767771363672283
          total_loss: -0.07095027081668377
          vf_explained_var: 0.27748027443885803
          vf_loss: 0.012865296720216671
    num_agent_steps_sampled: 846000
    num_agent_steps_trained: 846000
    num_steps_sampled: 846000
    num_steps_trained: 846000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,846,35974.5,846000,-2.7797,-2.26,-3.5,277.97


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 847000
  custom_metrics: {}
  date: 2021-10-25_02-31-22
  done: false
  episode_len_mean: 277.44
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.7743999999999853
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 3059
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1452323281774248
          cur_lr: 5.000000000000001e-05
          entropy: 0.7568201727337307
          entropy_coeff: 0.009999999999999998
          kl: 0.002890472827800118
          policy_loss: 0.006516973094807731
          total_loss: 0.011852854324711695
          vf_explained_var: 0.26536357402801514
          vf_loss: 0.012484290740556188
    num_agent_steps_sampled: 847000
    num_agent_steps_trained: 847000
    num_steps_sampled: 847000
    num_steps_trained: 847000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,847,36015.5,847000,-2.7744,-2.26,-3.5,277.44




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 848000
  custom_metrics: {}
  date: 2021-10-25_02-32-24
  done: false
  episode_len_mean: 276.31
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.763099999999984
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 3063
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0726161640887124
          cur_lr: 5.000000000000001e-05
          entropy: 0.7065431687566969
          entropy_coeff: 0.009999999999999998
          kl: 0.004535422122701662
          policy_loss: -0.015845008773936166
          total_loss: -0.01062637832429674
          vf_explained_var: 0.2823117673397064
          vf_loss: 0.011954718548804522
    num_agent_steps_sampled: 848000
    num_agent_steps_trained: 848000
    num_steps_sampled: 848000
    num_steps_trained: 848000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,848,36077.6,848000,-2.7631,-2.26,-3.5,276.31


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 849000
  custom_metrics: {}
  date: 2021-10-25_02-33-07
  done: false
  episode_len_mean: 274.92
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.749199999999985
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 3067
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0363080820443562
          cur_lr: 5.000000000000001e-05
          entropy: 0.763485477368037
          entropy_coeff: 0.009999999999999998
          kl: 0.005242617565165612
          policy_loss: 0.0462196312016911
          total_loss: 0.04946252778172493
          vf_explained_var: 0.2540477216243744
          vf_loss: 0.010687400421334637
    num_agent_steps_sampled: 849000
    num_agent_steps_trained: 849000
    num_steps_sampled: 849000
    num_steps_trained: 849000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,849,36119.9,849000,-2.7492,-2.26,-3.5,274.92


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 850000
  custom_metrics: {}
  date: 2021-10-25_02-33-46
  done: false
  episode_len_mean: 274.28
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.7427999999999857
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 3071
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0363080820443562
          cur_lr: 5.000000000000001e-05
          entropy: 0.7153471774525113
          entropy_coeff: 0.009999999999999998
          kl: 0.013135959231535328
          policy_loss: 0.003178382416566213
          total_loss: 0.010182652291324404
          vf_explained_var: 0.22198060154914856
          vf_loss: 0.01368079822924402
    num_agent_steps_sampled: 850000
    num_agent_steps_trained: 850000
    num_steps_sampled: 850000
    num_steps_trained: 850000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,850,36159.8,850000,-2.7428,-2.26,-3.5,274.28


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 851000
  custom_metrics: {}
  date: 2021-10-25_02-34-30
  done: false
  episode_len_mean: 272.0
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.719999999999986
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 3075
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0363080820443562
          cur_lr: 5.000000000000001e-05
          entropy: 0.6760960552427504
          entropy_coeff: 0.009999999999999998
          kl: 0.006065152545647222
          policy_loss: -0.001950762669245402
          total_loss: 0.004680570132202572
          vf_explained_var: 0.23452945053577423
          vf_loss: 0.013172080874856976
    num_agent_steps_sampled: 851000
    num_agent_steps_trained: 851000
    num_steps_sampled: 851000
    num_steps_trained: 851000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,851,36202.8,851000,-2.72,-2.26,-3.5,272


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 852000
  custom_metrics: {}
  date: 2021-10-25_02-35-13
  done: false
  episode_len_mean: 270.58
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.705799999999986
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 3
  episodes_total: 3078
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0363080820443562
          cur_lr: 5.000000000000001e-05
          entropy: 0.643084106180403
          entropy_coeff: 0.009999999999999998
          kl: 0.00937242950282003
          policy_loss: -0.10717627770370908
          total_loss: -0.10039627874890963
          vf_explained_var: 0.2924154996871948
          vf_loss: 0.012870546978794866
    num_agent_steps_sampled: 852000
    num_agent_steps_trained: 852000
    num_steps_sampled: 852000
    num_steps_trained: 852000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,852,36246.4,852000,-2.7058,-2.26,-3.5,270.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 853000
  custom_metrics: {}
  date: 2021-10-25_02-35-56
  done: false
  episode_len_mean: 269.57
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.695699999999986
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 3082
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0363080820443562
          cur_lr: 5.000000000000001e-05
          entropy: 0.5884038971530067
          entropy_coeff: 0.009999999999999998
          kl: 0.004023725457780082
          policy_loss: -0.008874280212654007
          total_loss: -0.0019784127258592184
          vf_explained_var: 0.30301299691200256
          vf_loss: 0.012633812044643693
    num_agent_steps_sampled: 853000
    num_agent_steps_trained: 853000
    num_steps_sampled: 853000
    num_steps_trained: 85300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,853,36289.7,853000,-2.6957,-2.26,-3.5,269.57


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 854000
  custom_metrics: {}
  date: 2021-10-25_02-36-39
  done: false
  episode_len_mean: 268.48
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.6847999999999868
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 3086
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0181540410221781
          cur_lr: 5.000000000000001e-05
          entropy: 0.5912363006008996
          entropy_coeff: 0.009999999999999998
          kl: 0.003065437997016021
          policy_loss: 0.0038196764058536954
          total_loss: 0.010658926185634401
          vf_explained_var: 0.3150697648525238
          vf_loss: 0.012695962325152424
    num_agent_steps_sampled: 854000
    num_agent_steps_trained: 854000
    num_steps_sampled: 854000
    num_steps_trained: 854000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,854,36331.9,854000,-2.6848,-2.26,-3.5,268.48




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 855000
  custom_metrics: {}
  date: 2021-10-25_02-37-39
  done: false
  episode_len_mean: 267.68
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.6767999999999863
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 3090
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00907702051108905
          cur_lr: 5.000000000000001e-05
          entropy: 0.6838538553979662
          entropy_coeff: 0.009999999999999998
          kl: 0.006223674300037235
          policy_loss: -0.0054387970103157894
          total_loss: 0.00032319401701291404
          vf_explained_var: 0.3140565752983093
          vf_loss: 0.012544038405434953
    num_agent_steps_sampled: 855000
    num_agent_steps_trained: 855000
    num_steps_sampled: 855000
    num_steps_trained: 85

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,855,36392.6,855000,-2.6768,-2.23,-3.5,267.68


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 856000
  custom_metrics: {}
  date: 2021-10-25_02-38-21
  done: false
  episode_len_mean: 267.09
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.6708999999999863
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 3094
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00907702051108905
          cur_lr: 5.000000000000001e-05
          entropy: 0.7321542845831976
          entropy_coeff: 0.009999999999999998
          kl: 0.01081790961387428
          policy_loss: 0.024160973106821377
          total_loss: 0.029259390922056303
          vf_explained_var: 0.37487539649009705
          vf_loss: 0.012321765327619182
    num_agent_steps_sampled: 856000
    num_agent_steps_trained: 856000
    num_steps_sampled: 856000
    num_steps_trained: 856000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,856,36434.3,856000,-2.6709,-2.23,-3.5,267.09


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 857000
  custom_metrics: {}
  date: 2021-10-25_02-39-03
  done: false
  episode_len_mean: 265.39
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.6538999999999864
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 4
  episodes_total: 3098
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00907702051108905
          cur_lr: 5.000000000000001e-05
          entropy: 0.7095815036031935
          entropy_coeff: 0.009999999999999998
          kl: 0.0074568877144026244
          policy_loss: 0.03035037728647391
          total_loss: 0.03433621848622958
          vf_explained_var: 0.3721086382865906
          vf_loss: 0.011013971926230524
    num_agent_steps_sampled: 857000
    num_agent_steps_trained: 857000
    num_steps_sampled: 857000
    num_steps_trained: 857000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,857,36476.5,857000,-2.6539,-2.23,-3.34,265.39


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 858000
  custom_metrics: {}
  date: 2021-10-25_02-39-44
  done: false
  episode_len_mean: 264.81
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.648099999999987
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 4
  episodes_total: 3102
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00907702051108905
          cur_lr: 5.000000000000001e-05
          entropy: 0.6633821262253655
          entropy_coeff: 0.009999999999999998
          kl: 0.012386249165285159
          policy_loss: 0.0007914990600612429
          total_loss: 0.00648420833879047
          vf_explained_var: 0.36835813522338867
          vf_loss: 0.012214099584768216
    num_agent_steps_sampled: 858000
    num_agent_steps_trained: 858000
    num_steps_sampled: 858000
    num_steps_trained: 858000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,858,36517.6,858000,-2.6481,-2.23,-3.34,264.81


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 859000
  custom_metrics: {}
  date: 2021-10-25_02-40-26
  done: false
  episode_len_mean: 264.2
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.6419999999999875
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 3
  episodes_total: 3105
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00907702051108905
          cur_lr: 5.000000000000001e-05
          entropy: 0.6272910230689579
          entropy_coeff: 0.009999999999999998
          kl: 0.005109019761376703
          policy_loss: -0.0019165296521451737
          total_loss: 0.0009402059432533053
          vf_explained_var: 0.451945036649704
          vf_loss: 0.009083270152202911
    num_agent_steps_sampled: 859000
    num_agent_steps_trained: 859000
    num_steps_sampled: 859000
    num_steps_trained: 85900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,859,36558.7,859000,-2.642,-2.23,-3.34,264.2


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 860000
  custom_metrics: {}
  date: 2021-10-25_02-41-05
  done: false
  episode_len_mean: 263.99
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.639899999999987
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 4
  episodes_total: 3109
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00907702051108905
          cur_lr: 5.000000000000001e-05
          entropy: 0.7068407780594296
          entropy_coeff: 0.009999999999999998
          kl: 0.019711926980554986
          policy_loss: 0.03645986798736784
          total_loss: 0.04119519657558865
          vf_explained_var: 0.40074822306632996
          vf_loss: 0.011624814145680932
    num_agent_steps_sampled: 860000
    num_agent_steps_trained: 860000
    num_steps_sampled: 860000
    num_steps_trained: 860000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,860,36598.5,860000,-2.6399,-2.23,-3.34,263.99


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 861000
  custom_metrics: {}
  date: 2021-10-25_02-41-47
  done: false
  episode_len_mean: 263.88
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.6387999999999874
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 4
  episodes_total: 3113
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00907702051108905
          cur_lr: 5.000000000000001e-05
          entropy: 0.6698760946591695
          entropy_coeff: 0.009999999999999998
          kl: 0.006754503120004962
          policy_loss: 0.00940632571776708
          total_loss: 0.015361154369182057
          vf_explained_var: 0.3435881435871124
          vf_loss: 0.012592277717259195
    num_agent_steps_sampled: 861000
    num_agent_steps_trained: 861000
    num_steps_sampled: 861000
    num_steps_trained: 861000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,861,36640.4,861000,-2.6388,-2.23,-3.34,263.88


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 862000
  custom_metrics: {}
  date: 2021-10-25_02-42-29
  done: false
  episode_len_mean: 264.21
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.6420999999999877
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 3
  episodes_total: 3116
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00907702051108905
          cur_lr: 5.000000000000001e-05
          entropy: 0.7231109460194906
          entropy_coeff: 0.009999999999999998
          kl: 0.010315398480402428
          policy_loss: -0.04411904381381141
          total_loss: -0.04101827724112405
          vf_explained_var: 0.4058186709880829
          vf_loss: 0.010238241885478299
    num_agent_steps_sampled: 862000
    num_agent_steps_trained: 862000
    num_steps_sampled: 862000
    num_steps_trained: 862000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,862,36682,862000,-2.6421,-2.23,-3.34,264.21




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 863000
  custom_metrics: {}
  date: 2021-10-25_02-43-22
  done: false
  episode_len_mean: 264.68
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.6467999999999883
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 4
  episodes_total: 3120
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00907702051108905
          cur_lr: 5.000000000000001e-05
          entropy: 0.9393448478645748
          entropy_coeff: 0.009999999999999998
          kl: 0.02353617827069693
          policy_loss: 0.03351005791789956
          total_loss: 0.03759327050712374
          vf_explained_var: 0.4066890776157379
          vf_loss: 0.013263022671971056
    num_agent_steps_sampled: 863000
    num_agent_steps_trained: 863000
    num_steps_sampled: 863000
    num_steps_trained: 863000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,863,36735.2,863000,-2.6468,-2.23,-3.34,264.68


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 864000
  custom_metrics: {}
  date: 2021-10-25_02-44-01
  done: false
  episode_len_mean: 265.76
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.6575999999999875
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 3
  episodes_total: 3123
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013615530766633572
          cur_lr: 5.000000000000001e-05
          entropy: 1.029799559381273
          entropy_coeff: 0.009999999999999998
          kl: 0.011621714051986328
          policy_loss: 0.045411109344826804
          total_loss: 0.04355948236253526
          vf_explained_var: 0.4873085021972656
          vf_loss: 0.008288136729970575
    num_agent_steps_sampled: 864000
    num_agent_steps_trained: 864000
    num_steps_sampled: 864000
    num_steps_trained: 864000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,864,36774.1,864000,-2.6576,-2.23,-3.34,265.76


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 865000
  custom_metrics: {}
  date: 2021-10-25_02-44-40
  done: false
  episode_len_mean: 266.67
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.666699999999987
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 4
  episodes_total: 3127
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013615530766633572
          cur_lr: 5.000000000000001e-05
          entropy: 0.9335630661911435
          entropy_coeff: 0.009999999999999998
          kl: 0.013751170112357948
          policy_loss: -0.009040511978997124
          total_loss: -0.004836501843399472
          vf_explained_var: 0.394847571849823
          vf_loss: 0.013352413920478689
    num_agent_steps_sampled: 865000
    num_agent_steps_trained: 865000
    num_steps_sampled: 865000
    num_steps_trained: 86500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,865,36813.2,865000,-2.6667,-2.23,-3.34,266.67


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 866000
  custom_metrics: {}
  date: 2021-10-25_02-45-19
  done: false
  episode_len_mean: 267.95
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.6794999999999867
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 3
  episodes_total: 3130
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013615530766633572
          cur_lr: 5.000000000000001e-05
          entropy: 1.0361262877782187
          entropy_coeff: 0.009999999999999998
          kl: 0.0128904830516145
          policy_loss: 0.04637426816754871
          total_loss: 0.04548118064800898
          vf_explained_var: 0.5650716423988342
          vf_loss: 0.009292664907924417
    num_agent_steps_sampled: 866000
    num_agent_steps_trained: 866000
    num_steps_sampled: 866000
    num_steps_trained: 866000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,866,36852.2,866000,-2.6795,-2.23,-3.34,267.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 867000
  custom_metrics: {}
  date: 2021-10-25_02-45-53
  done: false
  episode_len_mean: 268.72
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.6871999999999865
  episode_reward_min: -3.229999999999975
  episodes_this_iter: 3
  episodes_total: 3133
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013615530766633572
          cur_lr: 5.000000000000001e-05
          entropy: 1.0872152593400743
          entropy_coeff: 0.009999999999999998
          kl: 0.016214580163452456
          policy_loss: -0.040364162706666526
          total_loss: -0.04049410190847185
          vf_explained_var: 0.5266290903091431
          vf_loss: 0.010521440792621838
    num_agent_steps_sampled: 867000
    num_agent_steps_trained: 867000
    num_steps_sampled: 867000
    num_steps_trained: 86700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,867,36886.4,867000,-2.6872,-2.23,-3.23,268.72


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 868000
  custom_metrics: {}
  date: 2021-10-25_02-46-34
  done: false
  episode_len_mean: 269.72
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.6971999999999867
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 3137
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013615530766633572
          cur_lr: 5.000000000000001e-05
          entropy: 0.8128787464565701
          entropy_coeff: 0.009999999999999998
          kl: 0.013375241910604293
          policy_loss: 0.04269183029731115
          total_loss: 0.04480946511030197
          vf_explained_var: 0.39933958649635315
          vf_loss: 0.010064313511571122
    num_agent_steps_sampled: 868000
    num_agent_steps_trained: 868000
    num_steps_sampled: 868000
    num_steps_trained: 868000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,868,36926.5,868000,-2.6972,-2.23,-3.26,269.72


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 869000
  custom_metrics: {}
  date: 2021-10-25_02-47-17
  done: false
  episode_len_mean: 269.49
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.694899999999986
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 3141
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013615530766633572
          cur_lr: 5.000000000000001e-05
          entropy: 0.6307766603098975
          entropy_coeff: 0.009999999999999998
          kl: 0.007598051068195409
          policy_loss: -0.004407239663932059
          total_loss: 0.0020102184679773117
          vf_explained_var: 0.38936561346054077
          vf_loss: 0.012621773696608014
    num_agent_steps_sampled: 869000
    num_agent_steps_trained: 869000
    num_steps_sampled: 869000
    num_steps_trained: 869

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,869,36969.7,869000,-2.6949,-2.23,-3.26,269.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 870000
  custom_metrics: {}
  date: 2021-10-25_02-47-59
  done: false
  episode_len_mean: 269.4
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.693999999999986
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 3
  episodes_total: 3144
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013615530766633572
          cur_lr: 5.000000000000001e-05
          entropy: 0.6529905696709951
          entropy_coeff: 0.009999999999999998
          kl: 0.009301606053169091
          policy_loss: -0.04057679408126407
          total_loss: -0.03711165603664186
          vf_explained_var: 0.42539486289024353
          vf_loss: 0.009868398081097338
    num_agent_steps_sampled: 870000
    num_agent_steps_trained: 870000
    num_steps_sampled: 870000
    num_steps_trained: 870000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,870,37011.8,870000,-2.694,-2.23,-3.26,269.4


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 871000
  custom_metrics: {}
  date: 2021-10-25_02-48-41
  done: false
  episode_len_mean: 268.49
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.6848999999999865
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 3148
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013615530766633572
          cur_lr: 5.000000000000001e-05
          entropy: 0.6687707371181912
          entropy_coeff: 0.009999999999999998
          kl: 0.009448707544771414
          policy_loss: 0.0025504105620914037
          total_loss: 0.008229862567451266
          vf_explained_var: 0.38665634393692017
          vf_loss: 0.012238507541931338
    num_agent_steps_sampled: 871000
    num_agent_steps_trained: 871000
    num_steps_sampled: 871000
    num_steps_trained: 871

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,871,37053.8,871000,-2.6849,-2.23,-3.26,268.49




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 872000
  custom_metrics: {}
  date: 2021-10-25_02-49-35
  done: false
  episode_len_mean: 269.75
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.697499999999987
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 3
  episodes_total: 3151
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013615530766633572
          cur_lr: 5.000000000000001e-05
          entropy: 0.7283646272288429
          entropy_coeff: 0.009999999999999998
          kl: 0.028631903045045703
          policy_loss: -0.11944730025198724
          total_loss: -0.11253393573893442
          vf_explained_var: 0.3611716330051422
          vf_loss: 0.013807168747815821
    num_agent_steps_sampled: 872000
    num_agent_steps_trained: 872000
    num_steps_sampled: 872000
    num_steps_trained: 872000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,872,37107.8,872000,-2.6975,-2.23,-3.32,269.75


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 873000
  custom_metrics: {}
  date: 2021-10-25_02-50-13
  done: false
  episode_len_mean: 271.58
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.7157999999999856
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 4
  episodes_total: 3155
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.020423296149950365
          cur_lr: 5.000000000000001e-05
          entropy: 0.6369463721911113
          entropy_coeff: 0.009999999999999998
          kl: 0.02097041924941529
          policy_loss: 0.013441064208745957
          total_loss: 0.02093342426750395
          vf_explained_var: 0.25072476267814636
          vf_loss: 0.013433535873062081
    num_agent_steps_sampled: 873000
    num_agent_steps_trained: 873000
    num_steps_sampled: 873000
    num_steps_trained: 873000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,873,37145.5,873000,-2.7158,-2.23,-3.34,271.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 874000
  custom_metrics: {}
  date: 2021-10-25_02-50-55
  done: false
  episode_len_mean: 271.88
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.7187999999999857
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 3
  episodes_total: 3158
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.030634944224925534
          cur_lr: 5.000000000000001e-05
          entropy: 0.6963601191838582
          entropy_coeff: 0.009999999999999998
          kl: 0.013232385516769323
          policy_loss: -0.09878698057598538
          total_loss: -0.09224352811773619
          vf_explained_var: 0.2484026402235031
          vf_loss: 0.013101678755548265
    num_agent_steps_sampled: 874000
    num_agent_steps_trained: 874000
    num_steps_sampled: 874000
    num_steps_trained: 87400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,874,37187.3,874000,-2.7188,-2.23,-3.34,271.88


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 875000
  custom_metrics: {}
  date: 2021-10-25_02-51-37
  done: false
  episode_len_mean: 272.56
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.7255999999999854
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 4
  episodes_total: 3162
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.030634944224925534
          cur_lr: 5.000000000000001e-05
          entropy: 0.7052260273032718
          entropy_coeff: 0.009999999999999998
          kl: 0.012459089894431941
          policy_loss: -0.006434228188461727
          total_loss: -0.0003303072518772549
          vf_explained_var: 0.3010242283344269
          vf_loss: 0.012774495914992358
    num_agent_steps_sampled: 875000
    num_agent_steps_trained: 875000
    num_steps_sampled: 875000
    num_steps_trained: 87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,875,37229.7,875000,-2.7256,-2.23,-3.34,272.56


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 876000
  custom_metrics: {}
  date: 2021-10-25_02-52-13
  done: false
  episode_len_mean: 273.56
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.7355999999999856
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 3
  episodes_total: 3165
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.030634944224925534
          cur_lr: 5.000000000000001e-05
          entropy: 1.0122494790289136
          entropy_coeff: 0.009999999999999998
          kl: 0.012716009594054592
          policy_loss: -0.11505621357096565
          total_loss: -0.11203781142830849
          vf_explained_var: 0.4035107493400574
          vf_loss: 0.012751342676993873
    num_agent_steps_sampled: 876000
    num_agent_steps_trained: 876000
    num_steps_sampled: 876000
    num_steps_trained: 87600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,876,37266,876000,-2.7356,-2.23,-3.34,273.56


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 877000
  custom_metrics: {}
  date: 2021-10-25_02-52-52
  done: false
  episode_len_mean: 274.99
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.749899999999986
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 4
  episodes_total: 3169
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.030634944224925534
          cur_lr: 5.000000000000001e-05
          entropy: 0.9785753349463145
          entropy_coeff: 0.009999999999999998
          kl: 0.020147404872510162
          policy_loss: 0.06711257191167938
          total_loss: 0.06843074874745475
          vf_explained_var: 0.5012776851654053
          vf_loss: 0.010486716627039844
    num_agent_steps_sampled: 877000
    num_agent_steps_trained: 877000
    num_steps_sampled: 877000
    num_steps_trained: 877000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,877,37304.2,877000,-2.7499,-2.23,-3.34,274.99


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 878000
  custom_metrics: {}
  date: 2021-10-25_02-53-34
  done: false
  episode_len_mean: 275.11
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.7510999999999854
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 4
  episodes_total: 3173
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.045952416337388315
          cur_lr: 5.000000000000001e-05
          entropy: 0.8345736410882738
          entropy_coeff: 0.009999999999999998
          kl: 0.013430220782872562
          policy_loss: -0.007603584478298823
          total_loss: -0.0036715223143498105
          vf_explained_var: 0.47412529587745667
          vf_loss: 0.011660647071484063
    num_agent_steps_sampled: 878000
    num_agent_steps_trained: 878000
    num_steps_sampled: 878000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,878,37346.3,878000,-2.7511,-2.23,-3.34,275.11


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 879000
  custom_metrics: {}
  date: 2021-10-25_02-54-13
  done: false
  episode_len_mean: 275.75
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.757499999999985
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 3
  episodes_total: 3176
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.045952416337388315
          cur_lr: 5.000000000000001e-05
          entropy: 0.9407481888930003
          entropy_coeff: 0.009999999999999998
          kl: 0.01111733386734828
          policy_loss: 0.014333248883485795
          total_loss: 0.013730093919568592
          vf_explained_var: 0.6137675642967224
          vf_loss: 0.008293459956378986
    num_agent_steps_sampled: 879000
    num_agent_steps_trained: 879000
    num_steps_sampled: 879000
    num_steps_trained: 879000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,879,37385.6,879000,-2.7575,-2.23,-3.34,275.75




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 880000
  custom_metrics: {}
  date: 2021-10-25_02-55-10
  done: false
  episode_len_mean: 277.26
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.772599999999984
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 4
  episodes_total: 3180
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.045952416337388315
          cur_lr: 5.000000000000001e-05
          entropy: 0.9425192124313778
          entropy_coeff: 0.009999999999999998
          kl: 0.013189804246062033
          policy_loss: 0.0017492533557944827
          total_loss: 0.0046355184581544666
          vf_explained_var: 0.5393077731132507
          vf_loss: 0.011705354549404648
    num_agent_steps_sampled: 880000
    num_agent_steps_trained: 880000
    num_steps_sampled: 880000
    num_steps_trained: 8800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,880,37442.9,880000,-2.7726,-2.23,-3.34,277.26


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 881000
  custom_metrics: {}
  date: 2021-10-25_02-55-47
  done: false
  episode_len_mean: 278.8
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.7879999999999847
  episode_reward_min: -3.46999999999997
  episodes_this_iter: 3
  episodes_total: 3183
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.045952416337388315
          cur_lr: 5.000000000000001e-05
          entropy: 1.2152916616863674
          entropy_coeff: 0.009999999999999998
          kl: 0.0214144604222109
          policy_loss: 0.010578313966592152
          total_loss: 0.008719601068231794
          vf_explained_var: 0.6685552000999451
          vf_loss: 0.009310155052096687
    num_agent_steps_sampled: 881000
    num_agent_steps_trained: 881000
    num_steps_sampled: 881000
    num_steps_trained: 881000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,881,37479.3,881000,-2.788,-2.23,-3.47,278.8


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 882000
  custom_metrics: {}
  date: 2021-10-25_02-56-25
  done: false
  episode_len_mean: 279.53
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.795299999999985
  episode_reward_min: -3.46999999999997
  episodes_this_iter: 3
  episodes_total: 3186
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06892862450608246
          cur_lr: 5.000000000000001e-05
          entropy: 0.9283335771825578
          entropy_coeff: 0.009999999999999998
          kl: 0.008119326305911499
          policy_loss: 0.00515656860338317
          total_loss: 0.00494236085149977
          vf_explained_var: 0.6561126112937927
          vf_loss: 0.008509471989236772
    num_agent_steps_sampled: 882000
    num_agent_steps_trained: 882000
    num_steps_sampled: 882000
    num_steps_trained: 882000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,882,37517.4,882000,-2.7953,-2.23,-3.47,279.53


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 883000
  custom_metrics: {}
  date: 2021-10-25_02-57-05
  done: false
  episode_len_mean: 281.45
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.814499999999984
  episode_reward_min: -3.46999999999997
  episodes_this_iter: 4
  episodes_total: 3190
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06892862450608246
          cur_lr: 5.000000000000001e-05
          entropy: 1.127194246980879
          entropy_coeff: 0.009999999999999998
          kl: 0.01767472788349459
          policy_loss: -0.020880122979482014
          total_loss: -0.01992065351870325
          vf_explained_var: 0.6497250199317932
          vf_loss: 0.011013116418487496
    num_agent_steps_sampled: 883000
    num_agent_steps_trained: 883000
    num_steps_sampled: 883000
    num_steps_trained: 883000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,883,37556.9,883000,-2.8145,-2.3,-3.47,281.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 884000
  custom_metrics: {}
  date: 2021-10-25_02-57-40
  done: false
  episode_len_mean: 283.36
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.8335999999999832
  episode_reward_min: -3.569999999999968
  episodes_this_iter: 3
  episodes_total: 3193
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06892862450608246
          cur_lr: 5.000000000000001e-05
          entropy: 1.4234373331069947
          entropy_coeff: 0.009999999999999998
          kl: 0.033454627127999875
          policy_loss: 0.043637616394294636
          total_loss: 0.039557029803593956
          vf_explained_var: 0.5942937731742859
          vf_loss: 0.007847804503722324
    num_agent_steps_sampled: 884000
    num_agent_steps_trained: 884000
    num_steps_sampled: 884000
    num_steps_trained: 884000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,884,37592.6,884000,-2.8336,-2.3,-3.57,283.36


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 885000
  custom_metrics: {}
  date: 2021-10-25_02-58-13
  done: false
  episode_len_mean: 285.17
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.8516999999999832
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 3
  episodes_total: 3196
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10339293675912373
          cur_lr: 5.000000000000001e-05
          entropy: 1.3911786529752943
          entropy_coeff: 0.009999999999999998
          kl: 0.017597168487534672
          policy_loss: 0.06254406380984519
          total_loss: 0.059118380480342445
          vf_explained_var: 0.5962870121002197
          vf_loss: 0.008666678488306287
    num_agent_steps_sampled: 885000
    num_agent_steps_trained: 885000
    num_steps_sampled: 885000
    num_steps_trained: 885000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,885,37625.8,885000,-2.8517,-2.3,-3.87,285.17


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 886000
  custom_metrics: {}
  date: 2021-10-25_02-58-44
  done: false
  episode_len_mean: 287.5
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.8749999999999822
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 2
  episodes_total: 3198
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10339293675912373
          cur_lr: 5.000000000000001e-05
          entropy: 1.5179111652904087
          entropy_coeff: 0.009999999999999998
          kl: 0.025674887694272126
          policy_loss: -0.12826114197572072
          total_loss: -0.1320960268378258
          vf_explained_var: 0.3079706132411957
          vf_loss: 0.008689621385807792
    num_agent_steps_sampled: 886000
    num_agent_steps_trained: 886000
    num_steps_sampled: 886000
    num_steps_trained: 886000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,886,37656,886000,-2.875,-2.3,-4.23,287.5


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 887000
  custom_metrics: {}
  date: 2021-10-25_02-59-13
  done: false
  episode_len_mean: 291.78
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.9177999999999815
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 3
  episodes_total: 3201
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15508940513868555
          cur_lr: 5.000000000000001e-05
          entropy: 1.618227579858568
          entropy_coeff: 0.009999999999999998
          kl: 0.020298319402745454
          policy_loss: 0.003884908391369714
          total_loss: -0.0013122320175170898
          vf_explained_var: 0.6882441639900208
          vf_loss: 0.00783707721469303
    num_agent_steps_sampled: 887000
    num_agent_steps_trained: 887000
    num_steps_sampled: 887000
    num_steps_trained: 887000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,887,37685.7,887000,-2.9178,-2.3,-4.67,291.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 888000
  custom_metrics: {}
  date: 2021-10-25_02-59-49
  done: false
  episode_len_mean: 293.41
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.934099999999982
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 3
  episodes_total: 3204
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23263410770802836
          cur_lr: 5.000000000000001e-05
          entropy: 1.3677996370527479
          entropy_coeff: 0.009999999999999998
          kl: 0.00759314069111845
          policy_loss: 0.08469234936767155
          total_loss: 0.08070058325926463
          vf_explained_var: 0.6390631198883057
          vf_loss: 0.007919800724549633
    num_agent_steps_sampled: 888000
    num_agent_steps_trained: 888000
    num_steps_sampled: 888000
    num_steps_trained: 888000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,888,37721.7,888000,-2.9341,-2.3,-4.67,293.41


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 889000
  custom_metrics: {}
  date: 2021-10-25_03-00-24
  done: false
  episode_len_mean: 295.81
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.9580999999999813
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 3
  episodes_total: 3207
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23263410770802836
          cur_lr: 5.000000000000001e-05
          entropy: 1.333530781004164
          entropy_coeff: 0.009999999999999998
          kl: 0.011234609668759927
          policy_loss: 0.06203387611442142
          total_loss: 0.059532441198825836
          vf_explained_var: 0.5810390710830688
          vf_loss: 0.008220318461018098
    num_agent_steps_sampled: 889000
    num_agent_steps_trained: 889000
    num_steps_sampled: 889000
    num_steps_trained: 889000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,889,37756,889000,-2.9581,-2.3,-4.67,295.81




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 890000
  custom_metrics: {}
  date: 2021-10-25_03-01-16
  done: false
  episode_len_mean: 296.93
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.96929999999998
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 3
  episodes_total: 3210
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23263410770802836
          cur_lr: 5.000000000000001e-05
          entropy: 1.228201695283254
          entropy_coeff: 0.009999999999999998
          kl: 0.008276039637626317
          policy_loss: 0.06143192946910858
          total_loss: 0.0604420832461781
          vf_explained_var: 0.5333603620529175
          vf_loss: 0.009366881986433226
    num_agent_steps_sampled: 890000
    num_agent_steps_trained: 890000
    num_steps_sampled: 890000
    num_steps_trained: 890000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,890,37808.6,890000,-2.9693,-2.3,-4.67,296.93


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 891000
  custom_metrics: {}
  date: 2021-10-25_03-01-56
  done: false
  episode_len_mean: 298.12
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.9811999999999803
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 3
  episodes_total: 3213
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23263410770802836
          cur_lr: 5.000000000000001e-05
          entropy: 1.1882295423083835
          entropy_coeff: 0.009999999999999998
          kl: 0.0040389875401773325
          policy_loss: -0.06540311815010177
          total_loss: -0.06298930024107298
          vf_explained_var: 0.34125134348869324
          vf_loss: 0.01335650747641921
    num_agent_steps_sampled: 891000
    num_agent_steps_trained: 891000
    num_steps_sampled: 891000
    num_steps_trained: 891000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,891,37848.3,891000,-2.9812,-2.3,-4.67,298.12


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 892000
  custom_metrics: {}
  date: 2021-10-25_03-02-34
  done: false
  episode_len_mean: 299.35
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.9934999999999796
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 3
  episodes_total: 3216
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11631705385401418
          cur_lr: 5.000000000000001e-05
          entropy: 1.2141361422008938
          entropy_coeff: 0.009999999999999998
          kl: 0.009456130013637596
          policy_loss: -0.09565351986222798
          total_loss: -0.09616844157377878
          vf_explained_var: 0.4834093451499939
          vf_loss: 0.010526529032116134
    num_agent_steps_sampled: 892000
    num_agent_steps_trained: 892000
    num_steps_sampled: 892000
    num_steps_trained: 892000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,892,37885.8,892000,-2.9935,-2.3,-4.67,299.35


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 893000
  custom_metrics: {}
  date: 2021-10-25_03-03-11
  done: false
  episode_len_mean: 300.3
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -3.0029999999999797
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 4
  episodes_total: 3220
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11631705385401418
          cur_lr: 5.000000000000001e-05
          entropy: 1.202386932902866
          entropy_coeff: 0.009999999999999998
          kl: 0.011014127019962277
          policy_loss: 0.019857181277539996
          total_loss: 0.02012213683790631
          vf_explained_var: 0.4078024923801422
          vf_loss: 0.011007694227414
    num_agent_steps_sampled: 893000
    num_agent_steps_trained: 893000
    num_steps_sampled: 893000
    num_steps_trained: 893000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,893,37923.5,893000,-3.003,-2.3,-4.67,300.3


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 894000
  custom_metrics: {}
  date: 2021-10-25_03-03-49
  done: false
  episode_len_mean: 300.18
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -3.00179999999998
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 3
  episodes_total: 3223
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11631705385401418
          cur_lr: 5.000000000000001e-05
          entropy: 1.0823542303509182
          entropy_coeff: 0.009999999999999998
          kl: 0.006571591634085097
          policy_loss: 0.061681343946192
          total_loss: 0.06102975110212962
          vf_explained_var: 0.49217647314071655
          vf_loss: 0.00940756089403294
    num_agent_steps_sampled: 894000
    num_agent_steps_trained: 894000
    num_steps_sampled: 894000
    num_steps_trained: 894000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,894,37961.3,894000,-3.0018,-2.3,-4.67,300.18


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 895000
  custom_metrics: {}
  date: 2021-10-25_03-04-28
  done: false
  episode_len_mean: 300.43
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -3.00429999999998
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 3
  episodes_total: 3226
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11631705385401418
          cur_lr: 5.000000000000001e-05
          entropy: 0.9788093672858345
          entropy_coeff: 0.009999999999999998
          kl: 0.011028499177833516
          policy_loss: -0.10653764555851618
          total_loss: -0.10151808063189188
          vf_explained_var: 0.35504984855651855
          vf_loss: 0.0135248559113178
    num_agent_steps_sampled: 895000
    num_agent_steps_trained: 895000
    num_steps_sampled: 895000
    num_steps_trained: 895000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,895,37999.6,895000,-3.0043,-2.3,-4.67,300.43


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 896000
  custom_metrics: {}
  date: 2021-10-25_03-05-05
  done: false
  episode_len_mean: 300.74
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -3.00739999999998
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 4
  episodes_total: 3230
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11631705385401418
          cur_lr: 5.000000000000001e-05
          entropy: 1.0557863109641605
          entropy_coeff: 0.009999999999999998
          kl: 0.012895726894570695
          policy_loss: 0.0022171750664711
          total_loss: 0.005747993455992805
          vf_explained_var: 0.4339725375175476
          vf_loss: 0.01258868777917491
    num_agent_steps_sampled: 896000
    num_agent_steps_trained: 896000
    num_steps_sampled: 896000
    num_steps_trained: 896000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,896,38036.8,896000,-3.0074,-2.3,-4.67,300.74


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 897000
  custom_metrics: {}
  date: 2021-10-25_03-05-44
  done: false
  episode_len_mean: 299.85
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.9984999999999804
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 3
  episodes_total: 3233
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11631705385401418
          cur_lr: 5.000000000000001e-05
          entropy: 0.8926315956645542
          entropy_coeff: 0.009999999999999998
          kl: 0.006377456168617633
          policy_loss: -0.06009765581952201
          total_loss: -0.05657923701736662
          vf_explained_var: 0.37203729152679443
          vf_loss: 0.011702930130478408
    num_agent_steps_sampled: 897000
    num_agent_steps_trained: 897000
    num_steps_sampled: 897000
    num_steps_trained: 897000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,897,38076.2,897000,-2.9985,-2.3,-4.67,299.85


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 898000
  custom_metrics: {}
  date: 2021-10-25_03-06-24
  done: false
  episode_len_mean: 300.06
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.0005999999999795
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 4
  episodes_total: 3237
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11631705385401418
          cur_lr: 5.000000000000001e-05
          entropy: 0.9991025030612946
          entropy_coeff: 0.009999999999999998
          kl: 0.005879776533864363
          policy_loss: -0.0016514879133966234
          total_loss: 0.0014849769572416942
          vf_explained_var: 0.46525973081588745
          vf_loss: 0.012443571827477878
    num_agent_steps_sampled: 898000
    num_agent_steps_trained: 898000
    num_steps_sampled: 898000
    num_steps_trained: 8980

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,898,38116.2,898000,-3.0006,-2.57,-4.67,300.06




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 899000
  custom_metrics: {}
  date: 2021-10-25_03-07-22
  done: false
  episode_len_mean: 300.53
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.0052999999999797
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 3
  episodes_total: 3240
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11631705385401418
          cur_lr: 5.000000000000001e-05
          entropy: 0.9152220613426633
          entropy_coeff: 0.009999999999999998
          kl: 0.0056457964798087325
          policy_loss: -0.08258644483155675
          total_loss: -0.0791604003144635
          vf_explained_var: 0.4614209830760956
          vf_loss: 0.011921559615681568
    num_agent_steps_sampled: 899000
    num_agent_steps_trained: 899000
    num_steps_sampled: 899000
    num_steps_trained: 899000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,899,38173.6,899000,-3.0053,-2.57,-4.67,300.53


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 900000
  custom_metrics: {}
  date: 2021-10-25_03-08-03
  done: false
  episode_len_mean: 301.23
  episode_media: {}
  episode_reward_max: -2.579999999999989
  episode_reward_mean: -3.0122999999999793
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 4
  episodes_total: 3244
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11631705385401418
          cur_lr: 5.000000000000001e-05
          entropy: 0.983884580930074
          entropy_coeff: 0.009999999999999998
          kl: 0.006916202675590139
          policy_loss: 0.01740687953101264
          total_loss: 0.02000745948817995
          vf_explained_var: 0.4999593198299408
          vf_loss: 0.011634952771580881
    num_agent_steps_sampled: 900000
    num_agent_steps_trained: 900000
    num_steps_sampled: 900000
    num_steps_trained: 900000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,900,38214.7,900000,-3.0123,-2.58,-4.67,301.23


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 901000
  custom_metrics: {}
  date: 2021-10-25_03-08-44
  done: false
  episode_len_mean: 301.62
  episode_media: {}
  episode_reward_max: -2.579999999999989
  episode_reward_mean: -3.0161999999999796
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 4
  episodes_total: 3248
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11631705385401418
          cur_lr: 5.000000000000001e-05
          entropy: 0.8530727412965563
          entropy_coeff: 0.009999999999999998
          kl: 0.004496996312421686
          policy_loss: -0.008448018588953547
          total_loss: -0.004132656463318401
          vf_explained_var: 0.45662304759025574
          vf_loss: 0.012323007784369918
    num_agent_steps_sampled: 901000
    num_agent_steps_trained: 901000
    num_steps_sampled: 901000
    num_steps_trained: 90100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,901,38255.7,901000,-3.0162,-2.58,-4.67,301.62


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 902000
  custom_metrics: {}
  date: 2021-10-25_03-09-25
  done: false
  episode_len_mean: 300.84
  episode_media: {}
  episode_reward_max: -2.579999999999989
  episode_reward_mean: -3.00839999999998
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 3
  episodes_total: 3251
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05815852692700709
          cur_lr: 5.000000000000001e-05
          entropy: 0.7724610686302185
          entropy_coeff: 0.009999999999999998
          kl: 0.008142741377733288
          policy_loss: -0.004153582702080409
          total_loss: -0.00276956996983952
          vf_explained_var: 0.5059080123901367
          vf_loss: 0.008635054220859376
    num_agent_steps_sampled: 902000
    num_agent_steps_trained: 902000
    num_steps_sampled: 902000
    num_steps_trained: 902000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,902,38297.1,902000,-3.0084,-2.58,-4.67,300.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 903000
  custom_metrics: {}
  date: 2021-10-25_03-10-05
  done: false
  episode_len_mean: 299.63
  episode_media: {}
  episode_reward_max: -2.579999999999989
  episode_reward_mean: -2.9962999999999793
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 4
  episodes_total: 3255
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05815852692700709
          cur_lr: 5.000000000000001e-05
          entropy: 0.8601597362094455
          entropy_coeff: 0.009999999999999998
          kl: 0.006532509264072988
          policy_loss: 0.027804231229755613
          total_loss: 0.031959106276432676
          vf_explained_var: 0.44091373682022095
          vf_loss: 0.012376552789161603
    num_agent_steps_sampled: 903000
    num_agent_steps_trained: 903000
    num_steps_sampled: 903000
    num_steps_trained: 903000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,903,38337,903000,-2.9963,-2.58,-4.67,299.63


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 904000
  custom_metrics: {}
  date: 2021-10-25_03-10-46
  done: false
  episode_len_mean: 299.45
  episode_media: {}
  episode_reward_max: -2.579999999999989
  episode_reward_mean: -2.99449999999998
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 3
  episodes_total: 3258
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05815852692700709
          cur_lr: 5.000000000000001e-05
          entropy: 0.7321064293384552
          entropy_coeff: 0.009999999999999998
          kl: 0.006523641516748234
          policy_loss: -0.10492958194679684
          total_loss: -0.09944400878416168
          vf_explained_var: 0.38338181376457214
          vf_loss: 0.012427229361815586
    num_agent_steps_sampled: 904000
    num_agent_steps_trained: 904000
    num_steps_sampled: 904000
    num_steps_trained: 904000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,904,38378.2,904000,-2.9945,-2.58,-4.67,299.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 905000
  custom_metrics: {}
  date: 2021-10-25_03-11-23
  done: false
  episode_len_mean: 300.88
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.008799999999979
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 4
  episodes_total: 3262
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05815852692700709
          cur_lr: 5.000000000000001e-05
          entropy: 0.9271054479810926
          entropy_coeff: 0.009999999999999998
          kl: 0.007659293951502851
          policy_loss: 0.04104446118904485
          total_loss: 0.04467813409864903
          vf_explained_var: 0.37735989689826965
          vf_loss: 0.012459272684322464
    num_agent_steps_sampled: 905000
    num_agent_steps_trained: 905000
    num_steps_sampled: 905000
    num_steps_trained: 905000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,905,38415.2,905000,-3.0088,-2.59,-4.67,300.88


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 906000
  custom_metrics: {}
  date: 2021-10-25_03-12-06
  done: false
  episode_len_mean: 300.34
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.003399999999979
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 3
  episodes_total: 3265
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05815852692700709
          cur_lr: 5.000000000000001e-05
          entropy: 0.7527855813503266
          entropy_coeff: 0.009999999999999998
          kl: 0.00985444441586961
          policy_loss: -0.1082276756564776
          total_loss: -0.10341603689723544
          vf_explained_var: 0.3867761492729187
          vf_loss: 0.011766376780966918
    num_agent_steps_sampled: 906000
    num_agent_steps_trained: 906000
    num_steps_sampled: 906000
    num_steps_trained: 906000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,906,38458.1,906000,-3.0034,-2.59,-4.67,300.34


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 907000
  custom_metrics: {}
  date: 2021-10-25_03-12-44
  done: false
  episode_len_mean: 299.96
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -2.999599999999979
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 4
  episodes_total: 3269
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05815852692700709
          cur_lr: 5.000000000000001e-05
          entropy: 0.9146077725622389
          entropy_coeff: 0.009999999999999998
          kl: 0.007666339962196389
          policy_loss: 0.06262447171741062
          total_loss: 0.06481022445691956
          vf_explained_var: 0.44313159584999084
          vf_loss: 0.010885968902665707
    num_agent_steps_sampled: 907000
    num_agent_steps_trained: 907000
    num_steps_sampled: 907000
    num_steps_trained: 907000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,907,38496,907000,-2.9996,-2.59,-4.67,299.96




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 908000
  custom_metrics: {}
  date: 2021-10-25_03-13-45
  done: false
  episode_len_mean: 299.97
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.99969999999998
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 4
  episodes_total: 3273
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05815852692700709
          cur_lr: 5.000000000000001e-05
          entropy: 0.9294880741172367
          entropy_coeff: 0.009999999999999998
          kl: 0.01475114632162402
          policy_loss: -0.010660512952340974
          total_loss: -0.006775152103768454
          vf_explained_var: 0.43782493472099304
          vf_loss: 0.012322336952719424
    num_agent_steps_sampled: 908000
    num_agent_steps_trained: 908000
    num_steps_sampled: 908000
    num_steps_trained: 908000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,908,38556.8,908000,-2.9997,-2.44,-4.67,299.97


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 909000
  custom_metrics: {}
  date: 2021-10-25_03-14-23
  done: false
  episode_len_mean: 300.03
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -3.0002999999999798
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 3
  episodes_total: 3276
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05815852692700709
          cur_lr: 5.000000000000001e-05
          entropy: 0.9190047482649485
          entropy_coeff: 0.009999999999999998
          kl: 0.009250774194861006
          policy_loss: 0.017994198865360685
          total_loss: 0.01806392777297232
          vf_explained_var: 0.5531218647956848
          vf_loss: 0.00872176194227197
    num_agent_steps_sampled: 909000
    num_agent_steps_trained: 909000
    num_steps_sampled: 909000
    num_steps_trained: 909000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,909,38595,909000,-3.0003,-2.44,-4.67,300.03


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 910000
  custom_metrics: {}
  date: 2021-10-25_03-15-03
  done: false
  episode_len_mean: 299.9
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.9989999999999792
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 4
  episodes_total: 3280
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05815852692700709
          cur_lr: 5.000000000000001e-05
          entropy: 1.1139154526922437
          entropy_coeff: 0.009999999999999998
          kl: 0.013527954181090568
          policy_loss: -0.015043658018112183
          total_loss: -0.01321862323416604
          vf_explained_var: 0.5108537673950195
          vf_loss: 0.012177420117788844
    num_agent_steps_sampled: 910000
    num_agent_steps_trained: 910000
    num_steps_sampled: 910000
    num_steps_trained: 910000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,910,38634.4,910000,-2.999,-2.44,-4.67,299.9


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 911000
  custom_metrics: {}
  date: 2021-10-25_03-15-40
  done: false
  episode_len_mean: 299.46
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.9945999999999797
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 3
  episodes_total: 3283
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05815852692700709
          cur_lr: 5.000000000000001e-05
          entropy: 1.0276653289794921
          entropy_coeff: 0.009999999999999998
          kl: 0.008221648491807008
          policy_loss: 0.0848412166039149
          total_loss: 0.08396101022760073
          vf_explained_var: 0.5309592485427856
          vf_loss: 0.008918283051914638
    num_agent_steps_sampled: 911000
    num_agent_steps_trained: 911000
    num_steps_sampled: 911000
    num_steps_trained: 911000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,911,38672,911000,-2.9946,-2.44,-4.67,299.46


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 912000
  custom_metrics: {}
  date: 2021-10-25_03-16-21
  done: false
  episode_len_mean: 299.3
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.992999999999981
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 4
  episodes_total: 3287
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05815852692700709
          cur_lr: 5.000000000000001e-05
          entropy: 1.0334226734108396
          entropy_coeff: 0.009999999999999998
          kl: 0.008352887243546882
          policy_loss: 0.0021545319507519406
          total_loss: 0.004245349226726426
          vf_explained_var: 0.5257592797279358
          vf_loss: 0.01193925224037634
    num_agent_steps_sampled: 912000
    num_agent_steps_trained: 912000
    num_steps_sampled: 912000
    num_steps_trained: 912000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,912,38713,912000,-2.993,-2.44,-4.67,299.3


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 913000
  custom_metrics: {}
  date: 2021-10-25_03-17-01
  done: false
  episode_len_mean: 298.79
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.98789999999998
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 3
  episodes_total: 3290
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05815852692700709
          cur_lr: 5.000000000000001e-05
          entropy: 1.085982890923818
          entropy_coeff: 0.009999999999999998
          kl: 0.008698266402087759
          policy_loss: 0.0527740478515625
          total_loss: 0.05091484205590354
          vf_explained_var: 0.6530654430389404
          vf_loss: 0.008494742749543446
    num_agent_steps_sampled: 913000
    num_agent_steps_trained: 913000
    num_steps_sampled: 913000
    num_steps_trained: 913000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,913,38752.7,913000,-2.9879,-2.44,-4.67,298.79


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 914000
  custom_metrics: {}
  date: 2021-10-25_03-17-37
  done: false
  episode_len_mean: 298.93
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.9892999999999796
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 3
  episodes_total: 3293
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05815852692700709
          cur_lr: 5.000000000000001e-05
          entropy: 1.1736697958575355
          entropy_coeff: 0.009999999999999998
          kl: 0.020056214361735256
          policy_loss: 0.018703301540679403
          total_loss: 0.018725012698107297
          vf_explained_var: 0.5534517168998718
          vf_loss: 0.010591969242604036
    num_agent_steps_sampled: 914000
    num_agent_steps_trained: 914000
    num_steps_sampled: 914000
    num_steps_trained: 914000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,914,38788.8,914000,-2.9893,-2.44,-4.67,298.93


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 915000
  custom_metrics: {}
  date: 2021-10-25_03-18-18
  done: false
  episode_len_mean: 296.1
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.9609999999999808
  episode_reward_min: -4.669999999999945
  episodes_this_iter: 4
  episodes_total: 3297
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0872377903905106
          cur_lr: 5.000000000000001e-05
          entropy: 1.0410903632640838
          entropy_coeff: 0.009999999999999998
          kl: 0.00906656861874894
          policy_loss: -0.0242704339325428
          total_loss: -0.021232443302869795
          vf_explained_var: 0.440615713596344
          vf_loss: 0.012657946803503566
    num_agent_steps_sampled: 915000
    num_agent_steps_trained: 915000
    num_steps_sampled: 915000
    num_steps_trained: 915000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,915,38829.8,915000,-2.961,-2.44,-4.67,296.1




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 916000
  custom_metrics: {}
  date: 2021-10-25_03-19-14
  done: false
  episode_len_mean: 293.05
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.9304999999999812
  episode_reward_min: -3.709999999999965
  episodes_this_iter: 3
  episodes_total: 3300
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0872377903905106
          cur_lr: 5.000000000000001e-05
          entropy: 1.1969259526994493
          entropy_coeff: 0.009999999999999998
          kl: 0.017807438023576358
          policy_loss: 0.011176317350731955
          total_loss: 0.009594387395514383
          vf_explained_var: 0.5389840006828308
          vf_loss: 0.008833847536394993
    num_agent_steps_sampled: 916000
    num_agent_steps_trained: 916000
    num_steps_sampled: 916000
    num_steps_trained: 916000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,916,38885.2,916000,-2.9305,-2.44,-3.71,293.05


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 917000
  custom_metrics: {}
  date: 2021-10-25_03-19-56
  done: false
  episode_len_mean: 291.15
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.911499999999981
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 3304
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0872377903905106
          cur_lr: 5.000000000000001e-05
          entropy: 1.0596368908882141
          entropy_coeff: 0.009999999999999998
          kl: 0.012245604686724141
          policy_loss: -0.01505304194158978
          total_loss: -0.012468279235892825
          vf_explained_var: 0.4397253692150116
          vf_loss: 0.01211285088211298
    num_agent_steps_sampled: 917000
    num_agent_steps_trained: 917000
    num_steps_sampled: 917000
    num_steps_trained: 917000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,917,38927.4,917000,-2.9115,-2.44,-3.61,291.15


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 918000
  custom_metrics: {}
  date: 2021-10-25_03-20-32
  done: false
  episode_len_mean: 289.54
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.895399999999982
  episode_reward_min: -3.369999999999972
  episodes_this_iter: 3
  episodes_total: 3307
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0872377903905106
          cur_lr: 5.000000000000001e-05
          entropy: 1.2477395859029559
          entropy_coeff: 0.009999999999999998
          kl: 0.012644520424582278
          policy_loss: 0.05853471242719226
          total_loss: 0.05566775914695528
          vf_explained_var: 0.5640100836753845
          vf_loss: 0.008507362814594268
    num_agent_steps_sampled: 918000
    num_agent_steps_trained: 918000
    num_steps_sampled: 918000
    num_steps_trained: 918000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,918,38963.6,918000,-2.8954,-2.44,-3.37,289.54


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 919000
  custom_metrics: {}
  date: 2021-10-25_03-21-12
  done: false
  episode_len_mean: 289.1
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.8909999999999827
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 3
  episodes_total: 3310
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0872377903905106
          cur_lr: 5.000000000000001e-05
          entropy: 1.127025889025794
          entropy_coeff: 0.009999999999999998
          kl: 0.009046230583003063
          policy_loss: -0.08817597710424
          total_loss: -0.08707804994450676
          vf_explained_var: 0.5063793063163757
          vf_loss: 0.011579012632783917
    num_agent_steps_sampled: 919000
    num_agent_steps_trained: 919000
    num_steps_sampled: 919000
    num_steps_trained: 919000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,919,39003,919000,-2.891,-2.44,-3.32,289.1


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 920000
  custom_metrics: {}
  date: 2021-10-25_03-21-50
  done: false
  episode_len_mean: 288.02
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.880199999999982
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 3314
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0872377903905106
          cur_lr: 5.000000000000001e-05
          entropy: 1.194379789299435
          entropy_coeff: 0.009999999999999998
          kl: 0.011028026794423182
          policy_loss: 0.023310193171103794
          total_loss: 0.021208696812391282
          vf_explained_var: 0.5939921736717224
          vf_loss: 0.008880237223477
    num_agent_steps_sampled: 920000
    num_agent_steps_trained: 920000
    num_steps_sampled: 920000
    num_steps_trained: 920000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,920,39041.1,920000,-2.8802,-2.44,-3.32,288.02


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 921000
  custom_metrics: {}
  date: 2021-10-25_03-22-29
  done: false
  episode_len_mean: 287.65
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.8764999999999823
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 3
  episodes_total: 3317
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0872377903905106
          cur_lr: 5.000000000000001e-05
          entropy: 1.0535207311312358
          entropy_coeff: 0.009999999999999998
          kl: 0.006758752536815235
          policy_loss: 0.05571793541312218
          total_loss: 0.05524557961357964
          vf_explained_var: 0.4741305112838745
          vf_loss: 0.009473232273012399
    num_agent_steps_sampled: 921000
    num_agent_steps_trained: 921000
    num_steps_sampled: 921000
    num_steps_trained: 921000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,921,39080.2,921000,-2.8765,-2.44,-3.32,287.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 922000
  custom_metrics: {}
  date: 2021-10-25_03-23-09
  done: false
  episode_len_mean: 287.23
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.8722999999999828
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 3321
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0872377903905106
          cur_lr: 5.000000000000001e-05
          entropy: 1.3121293187141418
          entropy_coeff: 0.009999999999999998
          kl: 0.014063096005434438
          policy_loss: -0.018846520947085485
          total_loss: -0.018579573763741388
          vf_explained_var: 0.5499112606048584
          vf_loss: 0.012161407423102193
    num_agent_steps_sampled: 922000
    num_agent_steps_trained: 922000
    num_steps_sampled: 922000
    num_steps_trained: 922000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,922,39120.6,922000,-2.8723,-2.44,-3.32,287.23


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 923000
  custom_metrics: {}
  date: 2021-10-25_03-23-47
  done: false
  episode_len_mean: 286.92
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.869199999999982
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 3
  episodes_total: 3324
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0872377903905106
          cur_lr: 5.000000000000001e-05
          entropy: 1.3328807261255053
          entropy_coeff: 0.009999999999999998
          kl: 0.009431575822467488
          policy_loss: 0.05164452609088686
          total_loss: 0.047584705303112665
          vf_explained_var: 0.6403972506523132
          vf_loss: 0.008446194984329244
    num_agent_steps_sampled: 923000
    num_agent_steps_trained: 923000
    num_steps_sampled: 923000
    num_steps_trained: 923000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,923,39158.2,923000,-2.8692,-2.44,-3.32,286.92


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 924000
  custom_metrics: {}
  date: 2021-10-25_03-24-21
  done: false
  episode_len_mean: 288.43
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.8842999999999823
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 3
  episodes_total: 3327
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0872377903905106
          cur_lr: 5.000000000000001e-05
          entropy: 1.4751037491692438
          entropy_coeff: 0.009999999999999998
          kl: 0.015513883982949237
          policy_loss: 0.04969163727429178
          total_loss: 0.04573756299085087
          vf_explained_var: 0.4902428388595581
          vf_loss: 0.009443566653256614
    num_agent_steps_sampled: 924000
    num_agent_steps_trained: 924000
    num_steps_sampled: 924000
    num_steps_trained: 924000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,924,39192.4,924000,-2.8843,-2.44,-3.73,288.43




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 925000
  custom_metrics: {}
  date: 2021-10-25_03-25-18
  done: false
  episode_len_mean: 288.48
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.884799999999982
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 3
  episodes_total: 3330
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0872377903905106
          cur_lr: 5.000000000000001e-05
          entropy: 1.2919293231434292
          entropy_coeff: 0.009999999999999998
          kl: 0.01035531607424155
          policy_loss: -0.1035817434390386
          total_loss: -0.10432606819603178
          vf_explained_var: 0.5925432443618774
          vf_loss: 0.01127158918728431
    num_agent_steps_sampled: 925000
    num_agent_steps_trained: 925000
    num_steps_sampled: 925000
    num_steps_trained: 925000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,925,39248.8,925000,-2.8848,-2.44,-3.73,288.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 926000
  custom_metrics: {}
  date: 2021-10-25_03-25-58
  done: false
  episode_len_mean: 288.79
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.887899999999982
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 3334
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0872377903905106
          cur_lr: 5.000000000000001e-05
          entropy: 1.4985479990641275
          entropy_coeff: 0.009999999999999998
          kl: 0.01783731675637351
          policy_loss: -0.047570201050904065
          total_loss: -0.0509920935663912
          vf_explained_var: 0.7084134817123413
          vf_loss: 0.010007498630632957
    num_agent_steps_sampled: 926000
    num_agent_steps_trained: 926000
    num_steps_sampled: 926000
    num_steps_trained: 926000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,926,39289.4,926000,-2.8879,-2.44,-3.73,288.79


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 927000
  custom_metrics: {}
  date: 2021-10-25_03-26-34
  done: false
  episode_len_mean: 289.31
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.893099999999982
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 3
  episodes_total: 3337
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0872377903905106
          cur_lr: 5.000000000000001e-05
          entropy: 1.513219330045912
          entropy_coeff: 0.009999999999999998
          kl: 0.03564363698338025
          policy_loss: 0.029385937253634135
          total_loss: 0.027874720841646196
          vf_explained_var: 0.5758513808250427
          vf_loss: 0.010511504905298352
    num_agent_steps_sampled: 927000
    num_agent_steps_trained: 927000
    num_steps_sampled: 927000
    num_steps_trained: 927000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,927,39325,927000,-2.8931,-2.44,-3.73,289.31


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 928000
  custom_metrics: {}
  date: 2021-10-25_03-27-13
  done: false
  episode_len_mean: 290.26
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.902599999999981
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 3
  episodes_total: 3340
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.130856685585766
          cur_lr: 5.000000000000001e-05
          entropy: 1.4586988435851203
          entropy_coeff: 0.009999999999999998
          kl: 0.01027188120149647
          policy_loss: 0.01336312914888064
          total_loss: 0.0075427765647570295
          vf_explained_var: 0.7452864050865173
          vf_loss: 0.00742249035069512
    num_agent_steps_sampled: 928000
    num_agent_steps_trained: 928000
    num_steps_sampled: 928000
    num_steps_trained: 928000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,928,39364.3,928000,-2.9026,-2.44,-3.73,290.26


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 929000
  custom_metrics: {}
  date: 2021-10-25_03-27-52
  done: false
  episode_len_mean: 290.64
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.906399999999982
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 3344
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.130856685585766
          cur_lr: 5.000000000000001e-05
          entropy: 1.4295594255129496
          entropy_coeff: 0.009999999999999998
          kl: 0.010445831392428318
          policy_loss: -0.030867539015081193
          total_loss: -0.03401987461580171
          vf_explained_var: 0.700534462928772
          vf_loss: 0.009776350628170703
    num_agent_steps_sampled: 929000
    num_agent_steps_trained: 929000
    num_steps_sampled: 929000
    num_steps_trained: 929000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,929,39403,929000,-2.9064,-2.44,-3.73,290.64


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 930000
  custom_metrics: {}
  date: 2021-10-25_03-28-30
  done: false
  episode_len_mean: 291.49
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.9148999999999825
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 3
  episodes_total: 3347
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.130856685585766
          cur_lr: 5.000000000000001e-05
          entropy: 1.6399348364935982
          entropy_coeff: 0.009999999999999998
          kl: 0.01081010944526501
          policy_loss: 0.013197665744357639
          total_loss: 0.004734959701697032
          vf_explained_var: 0.7968825697898865
          vf_loss: 0.006522067571576271
    num_agent_steps_sampled: 930000
    num_agent_steps_trained: 930000
    num_steps_sampled: 930000
    num_steps_trained: 930000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,930,39440.9,930000,-2.9149,-2.44,-3.73,291.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 931000
  custom_metrics: {}
  date: 2021-10-25_03-29-09
  done: false
  episode_len_mean: 292.58
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.925799999999981
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 3
  episodes_total: 3350
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.130856685585766
          cur_lr: 5.000000000000001e-05
          entropy: 1.428539592689938
          entropy_coeff: 0.009999999999999998
          kl: 0.009723441804325973
          policy_loss: 0.046647651079628205
          total_loss: 0.040800975097550284
          vf_explained_var: 0.7991327047348022
          vf_loss: 0.007166345833360942
    num_agent_steps_sampled: 931000
    num_agent_steps_trained: 931000
    num_steps_sampled: 931000
    num_steps_trained: 931000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,931,39479.5,931000,-2.9258,-2.44,-3.73,292.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 932000
  custom_metrics: {}
  date: 2021-10-25_03-29-47
  done: false
  episode_len_mean: 293.71
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.9370999999999814
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 3
  episodes_total: 3353
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.130856685585766
          cur_lr: 5.000000000000001e-05
          entropy: 1.3764278040991889
          entropy_coeff: 0.009999999999999998
          kl: 0.015299116903854661
          policy_loss: -0.060926678942309485
          total_loss: -0.06326105263498094
          vf_explained_var: 0.7216755151748657
          vf_loss: 0.00942791011184454
    num_agent_steps_sampled: 932000
    num_agent_steps_trained: 932000
    num_steps_sampled: 932000
    num_steps_trained: 932000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,932,39518.2,932000,-2.9371,-2.44,-3.73,293.71


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 933000
  custom_metrics: {}
  date: 2021-10-25_03-30-27
  done: false
  episode_len_mean: 295.45
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.9544999999999817
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 3357
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.130856685585766
          cur_lr: 5.000000000000001e-05
          entropy: 1.777334796057807
          entropy_coeff: 0.009999999999999998
          kl: 0.015368689315305122
          policy_loss: -0.02750244703557756
          total_loss: -0.03597468203968472
          vf_explained_var: 0.8254970908164978
          vf_loss: 0.007290015467959973
    num_agent_steps_sampled: 933000
    num_agent_steps_trained: 933000
    num_steps_sampled: 933000
    num_steps_trained: 933000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,933,39558.2,933000,-2.9545,-2.44,-3.73,295.45




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 934000
  custom_metrics: {}
  date: 2021-10-25_03-31-24
  done: false
  episode_len_mean: 296.37
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.9636999999999807
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 3
  episodes_total: 3360
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.130856685585766
          cur_lr: 5.000000000000001e-05
          entropy: 2.2105903214878504
          entropy_coeff: 0.009999999999999998
          kl: 0.026453971000973672
          policy_loss: -0.03366140541103151
          total_loss: -0.04871232178476122
          vf_explained_var: 0.8299353122711182
          vf_loss: 0.003593306921862273
    num_agent_steps_sampled: 934000
    num_agent_steps_trained: 934000
    num_steps_sampled: 934000
    num_steps_trained: 934000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,934,39614.9,934000,-2.9637,-2.44,-3.73,296.37


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 935000
  custom_metrics: {}
  date: 2021-10-25_03-32-06
  done: false
  episode_len_mean: 295.84
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.9583999999999806
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 3
  episodes_total: 3363
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.196285028378649
          cur_lr: 5.000000000000001e-05
          entropy: 0.9834802256690132
          entropy_coeff: 0.009999999999999998
          kl: 0.013448851744850682
          policy_loss: 0.025315432167715497
          total_loss: 0.02447769848836793
          vf_explained_var: 0.8425660133361816
          vf_loss: 0.00635726146089534
    num_agent_steps_sampled: 935000
    num_agent_steps_trained: 935000
    num_steps_sampled: 935000
    num_steps_trained: 935000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,935,39656.9,935000,-2.9584,-2.44,-3.73,295.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 936000
  custom_metrics: {}
  date: 2021-10-25_03-32-45
  done: false
  episode_len_mean: 297.09
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.970899999999981
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 3367
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.196285028378649
          cur_lr: 5.000000000000001e-05
          entropy: 1.3319008708000184
          entropy_coeff: 0.009999999999999998
          kl: 0.009935769378990322
          policy_loss: -0.03794657381044494
          total_loss: -0.04250224439634217
          vf_explained_var: 0.8569183349609375
          vf_loss: 0.006813098355713818
    num_agent_steps_sampled: 936000
    num_agent_steps_trained: 936000
    num_steps_sampled: 936000
    num_steps_trained: 936000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,936,39696.1,936000,-2.9709,-2.44,-3.73,297.09


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 937000
  custom_metrics: {}
  date: 2021-10-25_03-33-25
  done: false
  episode_len_mean: 297.48
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -2.974799999999981
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 3
  episodes_total: 3370
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.196285028378649
          cur_lr: 5.000000000000001e-05
          entropy: 1.3998792489369711
          entropy_coeff: 0.009999999999999998
          kl: 0.027065221826020135
          policy_loss: 0.02467609246571859
          total_loss: 0.020850162787569893
          vf_explained_var: 0.8693707585334778
          vf_loss: 0.0048603609055539385
    num_agent_steps_sampled: 937000
    num_agent_steps_trained: 937000
    num_steps_sampled: 937000
    num_steps_trained: 937000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,937,39735.5,937000,-2.9748,-2.51,-3.73,297.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 938000
  custom_metrics: {}
  date: 2021-10-25_03-34-06
  done: false
  episode_len_mean: 297.28
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -2.97279999999998
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 3374
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29442754256797343
          cur_lr: 5.000000000000001e-05
          entropy: 0.9070416046513452
          entropy_coeff: 0.009999999999999998
          kl: 0.008931897728470754
          policy_loss: -0.011760246919261084
          total_loss: -0.01122013239396943
          vf_explained_var: 0.7518450617790222
          vf_loss: 0.006980732217844989
    num_agent_steps_sampled: 938000
    num_agent_steps_trained: 938000
    num_steps_sampled: 938000
    num_steps_trained: 938000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,938,39776.4,938000,-2.9728,-2.51,-3.73,297.28


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 939000
  custom_metrics: {}
  date: 2021-10-25_03-34-49
  done: false
  episode_len_mean: 296.77
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -2.9676999999999807
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 3
  episodes_total: 3377
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29442754256797343
          cur_lr: 5.000000000000001e-05
          entropy: 0.8885199354754554
          entropy_coeff: 0.009999999999999998
          kl: 0.009307255880900792
          policy_loss: -0.092838207549519
          total_loss: -0.09094413328501913
          vf_explained_var: 0.6023398637771606
          vf_loss: 0.008038958870909281
    num_agent_steps_sampled: 939000
    num_agent_steps_trained: 939000
    num_steps_sampled: 939000
    num_steps_trained: 939000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,939,39819.4,939000,-2.9677,-2.51,-3.73,296.77


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 940000
  custom_metrics: {}
  date: 2021-10-25_03-35-32
  done: false
  episode_len_mean: 295.43
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.9542999999999804
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 3381
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29442754256797343
          cur_lr: 5.000000000000001e-05
          entropy: 0.8308694740136464
          entropy_coeff: 0.009999999999999998
          kl: 0.006862480591381923
          policy_loss: -0.05133533221152094
          total_loss: -0.048023603277073966
          vf_explained_var: 0.4993087649345398
          vf_loss: 0.009599925846689276
    num_agent_steps_sampled: 940000
    num_agent_steps_trained: 940000
    num_steps_sampled: 940000
    num_steps_trained: 940000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,940,39863.2,940000,-2.9543,-2.34,-3.73,295.43


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 941000
  custom_metrics: {}
  date: 2021-10-25_03-36-14
  done: false
  episode_len_mean: 294.58
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.945799999999981
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 3385
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29442754256797343
          cur_lr: 5.000000000000001e-05
          entropy: 0.868542308277554
          entropy_coeff: 0.009999999999999998
          kl: 0.006249005145451826
          policy_loss: 0.03379501071241167
          total_loss: 0.0357077999247445
          vf_explained_var: 0.5022749304771423
          vf_loss: 0.008758328715339303
    num_agent_steps_sampled: 941000
    num_agent_steps_trained: 941000
    num_steps_sampled: 941000
    num_steps_trained: 941000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,941,39904.4,941000,-2.9458,-2.34,-3.73,294.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 942000
  custom_metrics: {}
  date: 2021-10-25_03-36-57
  done: false
  episode_len_mean: 293.67
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.936699999999981
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 3389
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29442754256797343
          cur_lr: 5.000000000000001e-05
          entropy: 0.7485991974671682
          entropy_coeff: 0.009999999999999998
          kl: 0.003738392206857731
          policy_loss: 0.02361436014374097
          total_loss: 0.026114317857556874
          vf_explained_var: 0.46421897411346436
          vf_loss: 0.008885260567896895
    num_agent_steps_sampled: 942000
    num_agent_steps_trained: 942000
    num_steps_sampled: 942000
    num_steps_trained: 942000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,942,39947.4,942000,-2.9367,-2.34,-3.73,293.67




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 943000
  custom_metrics: {}
  date: 2021-10-25_03-38-01
  done: false
  episode_len_mean: 290.9
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.9089999999999816
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 3393
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14721377128398672
          cur_lr: 5.000000000000001e-05
          entropy: 0.7297386613157061
          entropy_coeff: 0.009999999999999998
          kl: 0.005916802306716286
          policy_loss: 0.005781464195913738
          total_loss: 0.009832574841048982
          vf_explained_var: 0.35920336842536926
          vf_loss: 0.010477462592017319
    num_agent_steps_sampled: 943000
    num_agent_steps_trained: 943000
    num_steps_sampled: 943000
    num_steps_trained: 943000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,943,40011.6,943000,-2.909,-2.31,-3.73,290.9


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 944000
  custom_metrics: {}
  date: 2021-10-25_03-38-46
  done: false
  episode_len_mean: 290.05
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.9004999999999814
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 3397
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14721377128398672
          cur_lr: 5.000000000000001e-05
          entropy: 1.0129796266555786
          entropy_coeff: 0.009999999999999998
          kl: 0.028214896880235093
          policy_loss: -0.01756636450688044
          total_loss: -0.011656379865275488
          vf_explained_var: 0.45592913031578064
          vf_loss: 0.011886160080838535
    num_agent_steps_sampled: 944000
    num_agent_steps_trained: 944000
    num_steps_sampled: 944000
    num_steps_trained: 9440

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,944,40056.1,944000,-2.9005,-2.31,-3.73,290.05


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 945000
  custom_metrics: {}
  date: 2021-10-25_03-39-29
  done: false
  episode_len_mean: 288.73
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.887299999999982
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 3401
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22082065692598007
          cur_lr: 5.000000000000001e-05
          entropy: 0.7450792776213752
          entropy_coeff: 0.009999999999999998
          kl: 0.010041570463477553
          policy_loss: -0.01370480213728216
          total_loss: -0.008998058612147967
          vf_explained_var: 0.4667014181613922
          vf_loss: 0.009940150294763346
    num_agent_steps_sampled: 945000
    num_agent_steps_trained: 945000
    num_steps_sampled: 945000
    num_steps_trained: 945000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,945,40099.4,945000,-2.8873,-2.29,-3.73,288.73


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 946000
  custom_metrics: {}
  date: 2021-10-25_03-40-10
  done: false
  episode_len_mean: 288.0
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.8799999999999817
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 3
  episodes_total: 3404
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22082065692598007
          cur_lr: 5.000000000000001e-05
          entropy: 0.8650512622462379
          entropy_coeff: 0.009999999999999998
          kl: 0.014507390014976642
          policy_loss: -0.012618256608645122
          total_loss: -0.01247717547747824
          vf_explained_var: 0.7879032492637634
          vf_loss: 0.005588063533003959
    num_agent_steps_sampled: 946000
    num_agent_steps_trained: 946000
    num_steps_sampled: 946000
    num_steps_trained: 946000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,946,40140.1,946000,-2.88,-2.29,-3.73,288


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 947000
  custom_metrics: {}
  date: 2021-10-25_03-40-52
  done: false
  episode_len_mean: 287.07
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.870699999999983
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 3408
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22082065692598007
          cur_lr: 5.000000000000001e-05
          entropy: 0.7674772673183017
          entropy_coeff: 0.009999999999999998
          kl: 0.01516556182257441
          policy_loss: -0.012200387939810754
          total_loss: -0.006867990063296424
          vf_explained_var: 0.6258445978164673
          vf_loss: 0.009658301833810078
    num_agent_steps_sampled: 947000
    num_agent_steps_trained: 947000
    num_steps_sampled: 947000
    num_steps_trained: 947000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,947,40182.5,947000,-2.8707,-2.29,-3.73,287.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 948000
  custom_metrics: {}
  date: 2021-10-25_03-41-36
  done: false
  episode_len_mean: 285.48
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.8547999999999827
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 3412
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22082065692598007
          cur_lr: 5.000000000000001e-05
          entropy: 0.5697565125094519
          entropy_coeff: 0.009999999999999998
          kl: 0.01024185765512117
          policy_loss: 0.031041764054033492
          total_loss: 0.03625061565803157
          vf_explained_var: 0.556211531162262
          vf_loss: 0.008644802314746711
    num_agent_steps_sampled: 948000
    num_agent_steps_trained: 948000
    num_steps_sampled: 948000
    num_steps_trained: 948000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,948,40226.5,948000,-2.8548,-2.29,-3.73,285.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 949000
  custom_metrics: {}
  date: 2021-10-25_03-42-20
  done: false
  episode_len_mean: 284.3
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.8429999999999835
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 3416
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22082065692598007
          cur_lr: 5.000000000000001e-05
          entropy: 0.6907630642255147
          entropy_coeff: 0.009999999999999998
          kl: 0.01452568003298218
          policy_loss: 0.02921398596631156
          total_loss: 0.034436898844109644
          vf_explained_var: 0.49208885431289673
          vf_loss: 0.008922974392771722
    num_agent_steps_sampled: 949000
    num_agent_steps_trained: 949000
    num_steps_sampled: 949000
    num_steps_trained: 949000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,949,40270.4,949000,-2.843,-2.29,-3.73,284.3




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 950000
  custom_metrics: {}
  date: 2021-10-25_03-43-24
  done: false
  episode_len_mean: 281.88
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.8187999999999835
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 3420
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22082065692598007
          cur_lr: 5.000000000000001e-05
          entropy: 0.5836855706241396
          entropy_coeff: 0.009999999999999998
          kl: 0.009627709986392022
          policy_loss: 0.040714490910371146
          total_loss: 0.046276035077042046
          vf_explained_var: 0.3397727906703949
          vf_loss: 0.009272405919101502
    num_agent_steps_sampled: 950000
    num_agent_steps_trained: 950000
    num_steps_sampled: 950000
    num_steps_trained: 950000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,950,40334.7,950000,-2.8188,-2.24,-3.73,281.88


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 951000
  custom_metrics: {}
  date: 2021-10-25_03-44-15
  done: false
  episode_len_mean: 279.8
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.797999999999984
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 3424
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22082065692598007
          cur_lr: 5.000000000000001e-05
          entropy: 0.5873885598447588
          entropy_coeff: 0.009999999999999998
          kl: 0.006246558350872671
          policy_loss: -0.06759608305162854
          total_loss: -0.05605304862062136
          vf_explained_var: 0.04678456485271454
          vf_loss: 0.01603754935786128
    num_agent_steps_sampled: 951000
    num_agent_steps_trained: 951000
    num_steps_sampled: 951000
    num_steps_trained: 951000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,951,40385.3,951000,-2.798,-2.24,-3.73,279.8


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 952000
  custom_metrics: {}
  date: 2021-10-25_03-45-01
  done: false
  episode_len_mean: 275.87
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.7586999999999846
  episode_reward_min: -3.509999999999969
  episodes_this_iter: 4
  episodes_total: 3428
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22082065692598007
          cur_lr: 5.000000000000001e-05
          entropy: 0.625475001335144
          entropy_coeff: 0.009999999999999998
          kl: 0.005107342835018455
          policy_loss: -0.11571751766734653
          total_loss: -0.10692285845677058
          vf_explained_var: 0.25244128704071045
          vf_loss: 0.013921603177570635
    num_agent_steps_sampled: 952000
    num_agent_steps_trained: 952000
    num_steps_sampled: 952000
    num_steps_trained: 952000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,952,40431.3,952000,-2.7587,-2.24,-3.51,275.87


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 953000
  custom_metrics: {}
  date: 2021-10-25_03-45-47
  done: false
  episode_len_mean: 272.95
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.729499999999985
  episode_reward_min: -3.509999999999969
  episodes_this_iter: 5
  episodes_total: 3433
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22082065692598007
          cur_lr: 5.000000000000001e-05
          entropy: 0.5576638450225194
          entropy_coeff: 0.009999999999999998
          kl: 0.004725580851782417
          policy_loss: -0.010217458837562136
          total_loss: 0.0008402549558215671
          vf_explained_var: 0.11902409791946411
          vf_loss: 0.015590848411536878
    num_agent_steps_sampled: 953000
    num_agent_steps_trained: 953000
    num_steps_sampled: 953000
    num_steps_trained: 953000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,953,40477.4,953000,-2.7295,-2.24,-3.51,272.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 954000
  custom_metrics: {}
  date: 2021-10-25_03-46-34
  done: false
  episode_len_mean: 270.26
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.7025999999999857
  episode_reward_min: -3.509999999999969
  episodes_this_iter: 4
  episodes_total: 3437
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11041032846299004
          cur_lr: 5.000000000000001e-05
          entropy: 0.4793330434295866
          entropy_coeff: 0.009999999999999998
          kl: 0.003603244713274931
          policy_loss: 0.036299829185009
          total_loss: 0.043479800058735744
          vf_explained_var: 0.07057095319032669
          vf_loss: 0.01157546474908789
    num_agent_steps_sampled: 954000
    num_agent_steps_trained: 954000
    num_steps_sampled: 954000
    num_steps_trained: 954000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,954,40524.4,954000,-2.7026,-2.24,-3.51,270.26


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 955000
  custom_metrics: {}
  date: 2021-10-25_03-47-21
  done: false
  episode_len_mean: 267.16
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.6715999999999873
  episode_reward_min: -3.509999999999969
  episodes_this_iter: 4
  episodes_total: 3441
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05520516423149502
          cur_lr: 5.000000000000001e-05
          entropy: 0.5211444314983156
          entropy_coeff: 0.009999999999999998
          kl: 0.005219115444675755
          policy_loss: -0.004254010816415151
          total_loss: 0.0037506533993615044
          vf_explained_var: 0.09432327747344971
          vf_loss: 0.012927983276959923
    num_agent_steps_sampled: 955000
    num_agent_steps_trained: 955000
    num_steps_sampled: 955000
    num_steps_trained: 95500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,955,40571.3,955000,-2.6716,-2.24,-3.51,267.16


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 956000
  custom_metrics: {}
  date: 2021-10-25_03-48-02
  done: false
  episode_len_mean: 265.98
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.6597999999999873
  episode_reward_min: -3.509999999999969
  episodes_this_iter: 4
  episodes_total: 3445
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05520516423149502
          cur_lr: 5.000000000000001e-05
          entropy: 0.9809252195888095
          entropy_coeff: 0.009999999999999998
          kl: 0.023733194123804133
          policy_loss: 0.0009715743362903595
          total_loss: 0.004908094389571084
          vf_explained_var: 0.3476428687572479
          vf_loss: 0.012435575688464774
    num_agent_steps_sampled: 956000
    num_agent_steps_trained: 956000
    num_steps_sampled: 956000
    num_steps_trained: 956000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,956,40612.1,956000,-2.6598,-2.24,-3.51,265.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 957000
  custom_metrics: {}
  date: 2021-10-25_03-48-45
  done: false
  episode_len_mean: 264.74
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.6473999999999878
  episode_reward_min: -3.509999999999969
  episodes_this_iter: 4
  episodes_total: 3449
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08280774634724253
          cur_lr: 5.000000000000001e-05
          entropy: 0.9640637821621365
          entropy_coeff: 0.009999999999999998
          kl: 0.010622509865659425
          policy_loss: 0.0011505561984247632
          total_loss: 0.004362459894683626
          vf_explained_var: 0.44724562764167786
          vf_loss: 0.011972917109313938
    num_agent_steps_sampled: 957000
    num_agent_steps_trained: 957000
    num_steps_sampled: 957000
    num_steps_trained: 957000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,957,40655.6,957000,-2.6474,-2.24,-3.51,264.74




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 958000
  custom_metrics: {}
  date: 2021-10-25_03-49-48
  done: false
  episode_len_mean: 263.88
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.638799999999987
  episode_reward_min: -3.509999999999969
  episodes_this_iter: 3
  episodes_total: 3452
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08280774634724253
          cur_lr: 5.000000000000001e-05
          entropy: 1.2241414030392965
          entropy_coeff: 0.009999999999999998
          kl: 0.012635660894491944
          policy_loss: -0.015268747591310076
          total_loss: -0.01821603626012802
          vf_explained_var: 0.6423546075820923
          vf_loss: 0.008247792813926935
    num_agent_steps_sampled: 958000
    num_agent_steps_trained: 958000
    num_steps_sampled: 958000
    num_steps_trained: 958000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,958,40718.2,958000,-2.6388,-2.24,-3.51,263.88


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 959000
  custom_metrics: {}
  date: 2021-10-25_03-50-33
  done: false
  episode_len_mean: 261.0
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.609999999999988
  episode_reward_min: -3.509999999999969
  episodes_this_iter: 4
  episodes_total: 3456
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08280774634724253
          cur_lr: 5.000000000000001e-05
          entropy: 0.7634830468230778
          entropy_coeff: 0.009999999999999998
          kl: 0.0057700002131931705
          policy_loss: -0.03825739175081253
          total_loss: -0.032523769719733135
          vf_explained_var: 0.4390832781791687
          vf_loss: 0.012890651977310577
    num_agent_steps_sampled: 959000
    num_agent_steps_trained: 959000
    num_steps_sampled: 959000
    num_steps_trained: 959000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,959,40763.1,959000,-2.61,-2.24,-3.51,261


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 960000
  custom_metrics: {}
  date: 2021-10-25_03-51-18
  done: false
  episode_len_mean: 258.79
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.5878999999999888
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 4
  episodes_total: 3460
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08280774634724253
          cur_lr: 5.000000000000001e-05
          entropy: 1.0553667498959436
          entropy_coeff: 0.009999999999999998
          kl: 0.012282434131506608
          policy_loss: -0.056775141672955616
          total_loss: -0.05410825237631798
          vf_explained_var: 0.5183209180831909
          vf_loss: 0.012203474818832345
    num_agent_steps_sampled: 960000
    num_agent_steps_trained: 960000
    num_steps_sampled: 960000
    num_steps_trained: 960000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,960,40807.8,960000,-2.5879,-2.24,-3.36,258.79


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 961000
  custom_metrics: {}
  date: 2021-10-25_03-52-00
  done: false
  episode_len_mean: 257.73
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.5772999999999895
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 4
  episodes_total: 3464
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08280774634724253
          cur_lr: 5.000000000000001e-05
          entropy: 1.0233016692929797
          entropy_coeff: 0.009999999999999998
          kl: 0.03454155497914781
          policy_loss: -0.0032066718571715884
          total_loss: -0.0002086675415436427
          vf_explained_var: 0.5193621516227722
          vf_loss: 0.010370710756008823
    num_agent_steps_sampled: 961000
    num_agent_steps_trained: 961000
    num_steps_sampled: 961000
    num_steps_trained: 9610

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,961,40850.4,961000,-2.5773,-2.24,-3.36,257.73


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 962000
  custom_metrics: {}
  date: 2021-10-25_03-52-44
  done: false
  episode_len_mean: 255.22
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.55219999999999
  episode_reward_min: -2.9299999999999815
  episodes_this_iter: 4
  episodes_total: 3468
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12421161952086379
          cur_lr: 5.000000000000001e-05
          entropy: 0.8555737018585206
          entropy_coeff: 0.009999999999999998
          kl: 0.015217289707750738
          policy_loss: -0.044280942529439926
          total_loss: -0.040770809435182145
          vf_explained_var: 0.49980655312538147
          vf_loss: 0.010175711868537798
    num_agent_steps_sampled: 962000
    num_agent_steps_trained: 962000
    num_steps_sampled: 962000
    num_steps_trained: 962000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,962,40893.9,962000,-2.5522,-2.24,-2.93,255.22


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 963000
  custom_metrics: {}
  date: 2021-10-25_03-53-32
  done: false
  episode_len_mean: 253.58
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.53579999999999
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 3472
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12421161952086379
          cur_lr: 5.000000000000001e-05
          entropy: 0.6663788325256772
          entropy_coeff: 0.009999999999999998
          kl: 0.019658218198987103
          policy_loss: -0.10783138918793864
          total_loss: -0.10062722019437287
          vf_explained_var: 0.36340945959091187
          vf_loss: 0.011426181128869454
    num_agent_steps_sampled: 963000
    num_agent_steps_trained: 963000
    num_steps_sampled: 963000
    num_steps_trained: 963000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,963,40942,963000,-2.5358,-2.24,-2.85,253.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 964000
  custom_metrics: {}
  date: 2021-10-25_03-54-19
  done: false
  episode_len_mean: 251.83
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.51829999999999
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 5
  episodes_total: 3477
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12421161952086379
          cur_lr: 5.000000000000001e-05
          entropy: 0.6454257124000126
          entropy_coeff: 0.009999999999999998
          kl: 0.016858805150240463
          policy_loss: -0.020430107414722443
          total_loss: -0.012536812490887112
          vf_explained_var: 0.2993796169757843
          vf_loss: 0.012253493132690588
    num_agent_steps_sampled: 964000
    num_agent_steps_trained: 964000
    num_steps_sampled: 964000
    num_steps_trained: 964000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,964,40989.1,964000,-2.5183,-2.24,-2.85,251.83




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 965000
  custom_metrics: {}
  date: 2021-10-25_03-55-26
  done: false
  episode_len_mean: 250.62
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.5061999999999904
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 3481
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12421161952086379
          cur_lr: 5.000000000000001e-05
          entropy: 0.5796202424499723
          entropy_coeff: 0.009999999999999998
          kl: 0.004670236518078575
          policy_loss: 0.02177369048198064
          total_loss: 0.02717896575729052
          vf_explained_var: 0.20201055705547333
          vf_loss: 0.010621379584901863
    num_agent_steps_sampled: 965000
    num_agent_steps_trained: 965000
    num_steps_sampled: 965000
    num_steps_trained: 965000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,965,41055.5,965000,-2.5062,-2.06,-2.85,250.62


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 966000
  custom_metrics: {}
  date: 2021-10-25_03-56-12
  done: false
  episode_len_mean: 249.22
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4921999999999906
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 3485
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.062105809760431896
          cur_lr: 5.000000000000001e-05
          entropy: 0.531049609515402
          entropy_coeff: 0.009999999999999998
          kl: 0.005832177053101617
          policy_loss: -0.004040302087863286
          total_loss: 0.002582622898949517
          vf_explained_var: 0.09124477207660675
          vf_loss: 0.011571206700884634
    num_agent_steps_sampled: 966000
    num_agent_steps_trained: 966000
    num_steps_sampled: 966000
    num_steps_trained: 966000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,966,41102.1,966000,-2.4922,-2.06,-2.85,249.22


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 967000
  custom_metrics: {}
  date: 2021-10-25_03-56-58
  done: false
  episode_len_mean: 248.26
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.482599999999991
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 5
  episodes_total: 3490
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.062105809760431896
          cur_lr: 5.000000000000001e-05
          entropy: 0.5358678443564309
          entropy_coeff: 0.009999999999999998
          kl: 0.004561354990081491
          policy_loss: -0.03154153509272469
          total_loss: -0.021725688378016153
          vf_explained_var: 0.1371106654405594
          vf_loss: 0.014891234847406546
    num_agent_steps_sampled: 967000
    num_agent_steps_trained: 967000
    num_steps_sampled: 967000
    num_steps_trained: 967000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,967,41148.2,967000,-2.4826,-2.06,-2.85,248.26


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 968000
  custom_metrics: {}
  date: 2021-10-25_03-57-44
  done: false
  episode_len_mean: 247.98
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4797999999999907
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 3494
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.031052904880215948
          cur_lr: 5.000000000000001e-05
          entropy: 0.9095485733615027
          entropy_coeff: 0.009999999999999998
          kl: 0.044440232052012696
          policy_loss: 0.0001206736597749922
          total_loss: 0.0023494691484504276
          vf_explained_var: 0.45634618401527405
          vf_loss: 0.009944285240231289
    num_agent_steps_sampled: 968000
    num_agent_steps_trained: 968000
    num_steps_sampled: 968000
    num_steps_trained: 968000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,968,41193.4,968000,-2.4798,-2.06,-2.85,247.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 969000
  custom_metrics: {}
  date: 2021-10-25_03-58-25
  done: false
  episode_len_mean: 247.34
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.473399999999991
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 3498
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04657935732032392
          cur_lr: 5.000000000000001e-05
          entropy: 0.8513111121124691
          entropy_coeff: 0.009999999999999998
          kl: 0.02642936367600039
          policy_loss: -0.02259221822023392
          total_loss: -0.021903293662601048
          vf_explained_var: 0.6088773608207703
          vf_loss: 0.007970973709598183
    num_agent_steps_sampled: 969000
    num_agent_steps_trained: 969000
    num_steps_sampled: 969000
    num_steps_trained: 969000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,969,41234.9,969000,-2.4734,-2.06,-2.85,247.34


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 970000
  custom_metrics: {}
  date: 2021-10-25_03-59-11
  done: false
  episode_len_mean: 247.04
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.470399999999991
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 3502
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06986903598048588
          cur_lr: 5.000000000000001e-05
          entropy: 0.8548175017038981
          entropy_coeff: 0.009999999999999998
          kl: 0.013657696834810749
          policy_loss: 0.03434904050081968
          total_loss: 0.03243409829835097
          vf_explained_var: 0.6467956304550171
          vf_loss: 0.005678981627958516
    num_agent_steps_sampled: 970000
    num_agent_steps_trained: 970000
    num_steps_sampled: 970000
    num_steps_trained: 970000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,970,41280.8,970000,-2.4704,-2.06,-2.85,247.04


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 971000
  custom_metrics: {}
  date: 2021-10-25_03-59-55
  done: false
  episode_len_mean: 246.3
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.462999999999991
  episode_reward_min: -2.8399999999999834
  episodes_this_iter: 4
  episodes_total: 3506
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06986903598048588
          cur_lr: 5.000000000000001e-05
          entropy: 1.0152798407607608
          entropy_coeff: 0.009999999999999998
          kl: 0.07029350147550623
          policy_loss: 0.019992895589934456
          total_loss: 0.020228269199530285
          vf_explained_var: 0.5875418782234192
          vf_loss: 0.005476833396177325
    num_agent_steps_sampled: 971000
    num_agent_steps_trained: 971000
    num_steps_sampled: 971000
    num_steps_trained: 971000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,971,41325.2,971000,-2.463,-2.06,-2.84,246.3




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 972000
  custom_metrics: {}
  date: 2021-10-25_04-01-02
  done: false
  episode_len_mean: 244.83
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4482999999999913
  episode_reward_min: -2.8399999999999834
  episodes_this_iter: 4
  episodes_total: 3510
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10480355397072884
          cur_lr: 5.000000000000001e-05
          entropy: 0.5685798701312806
          entropy_coeff: 0.009999999999999998
          kl: 0.039496969347159294
          policy_loss: 0.041904607084062365
          total_loss: 0.04599364565478431
          vf_explained_var: 0.5083469748497009
          vf_loss: 0.005635410037616061
    num_agent_steps_sampled: 972000
    num_agent_steps_trained: 972000
    num_steps_sampled: 972000
    num_steps_trained: 972000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,972,41391.9,972000,-2.4483,-2.04,-2.84,244.83


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 973000
  custom_metrics: {}
  date: 2021-10-25_04-01-52
  done: false
  episode_len_mean: 244.2
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4419999999999917
  episode_reward_min: -2.8399999999999834
  episodes_this_iter: 4
  episodes_total: 3514
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15720533095609326
          cur_lr: 5.000000000000001e-05
          entropy: 0.3871895796722836
          entropy_coeff: 0.009999999999999998
          kl: 0.008780455287407346
          policy_loss: -0.008558546668953365
          total_loss: -0.0047705969876713225
          vf_explained_var: 0.581568717956543
          vf_loss: 0.006279511987749073
    num_agent_steps_sampled: 973000
    num_agent_steps_trained: 973000
    num_steps_sampled: 973000
    num_steps_trained: 97300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,973,41441.3,973000,-2.442,-2.04,-2.84,244.2


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 974000
  custom_metrics: {}
  date: 2021-10-25_04-02-36
  done: false
  episode_len_mean: 244.49
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4448999999999916
  episode_reward_min: -2.8399999999999834
  episodes_this_iter: 4
  episodes_total: 3518
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15720533095609326
          cur_lr: 5.000000000000001e-05
          entropy: 0.3943863534265094
          entropy_coeff: 0.009999999999999998
          kl: 0.0034878444684851373
          policy_loss: 0.06225027963519096
          total_loss: 0.06791180678539806
          vf_explained_var: 0.4209855794906616
          vf_loss: 0.00905708150110311
    num_agent_steps_sampled: 974000
    num_agent_steps_trained: 974000
    num_steps_sampled: 974000
    num_steps_trained: 974000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,974,41485.3,974000,-2.4449,-2.04,-2.84,244.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 975000
  custom_metrics: {}
  date: 2021-10-25_04-03-22
  done: false
  episode_len_mean: 245.02
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4501999999999917
  episode_reward_min: -2.8399999999999834
  episodes_this_iter: 4
  episodes_total: 3522
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07860266547804663
          cur_lr: 5.000000000000001e-05
          entropy: 0.4253040270672904
          entropy_coeff: 0.009999999999999998
          kl: 0.006876915079812167
          policy_loss: -0.005317419601811303
          total_loss: -0.00021792476375897724
          vf_explained_var: 0.286275178194046
          vf_loss: 0.00881199057524403
    num_agent_steps_sampled: 975000
    num_agent_steps_trained: 975000
    num_steps_sampled: 975000
    num_steps_trained: 9750

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,975,41532.1,975000,-2.4502,-2.04,-2.84,245.02


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 976000
  custom_metrics: {}
  date: 2021-10-25_04-04-07
  done: false
  episode_len_mean: 245.86
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4585999999999912
  episode_reward_min: -2.8399999999999834
  episodes_this_iter: 4
  episodes_total: 3526
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07860266547804663
          cur_lr: 5.000000000000001e-05
          entropy: 0.4143695573012034
          entropy_coeff: 0.009999999999999998
          kl: 0.0071541237265600605
          policy_loss: -0.0023239449908336
          total_loss: 0.0066652890294790264
          vf_explained_var: 0.04137563705444336
          vf_loss: 0.012570598079926438
    num_agent_steps_sampled: 976000
    num_agent_steps_trained: 976000
    num_steps_sampled: 976000
    num_steps_trained: 9760

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,976,41576.8,976000,-2.4586,-2.04,-2.84,245.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 977000
  custom_metrics: {}
  date: 2021-10-25_04-04-49
  done: false
  episode_len_mean: 245.73
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.457299999999991
  episode_reward_min: -2.8399999999999834
  episodes_this_iter: 4
  episodes_total: 3530
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07860266547804663
          cur_lr: 5.000000000000001e-05
          entropy: 0.5948704858620961
          entropy_coeff: 0.009999999999999998
          kl: 0.021803511545860646
          policy_loss: -0.02218346359829108
          total_loss: -0.015500204803215133
          vf_explained_var: 0.18180625140666962
          vf_loss: 0.010918147696389092
    num_agent_steps_sampled: 977000
    num_agent_steps_trained: 977000
    num_steps_sampled: 977000
    num_steps_trained: 97700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,977,41618.6,977000,-2.4573,-2.04,-2.84,245.73


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 978000
  custom_metrics: {}
  date: 2021-10-25_04-05-28
  done: false
  episode_len_mean: 248.19
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4818999999999907
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3534
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11790399821706997
          cur_lr: 5.000000000000001e-05
          entropy: 0.41586541367901697
          entropy_coeff: 0.009999999999999998
          kl: 0.004634251513039247
          policy_loss: -0.014385210971037546
          total_loss: -0.005810770847731166
          vf_explained_var: 0.07093735039234161
          vf_loss: 0.012186698532766766
    num_agent_steps_sampled: 978000
    num_agent_steps_trained: 978000
    num_steps_sampled: 978000
    num_steps_trained: 97

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,978,41657.3,978000,-2.4819,-2.04,-3.41,248.19


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 979000
  custom_metrics: {}
  date: 2021-10-25_04-06-13
  done: false
  episode_len_mean: 248.66
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4865999999999904
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3538
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.058951999108534985
          cur_lr: 5.000000000000001e-05
          entropy: 0.3392801993423038
          entropy_coeff: 0.009999999999999998
          kl: 0.014011045757817047
          policy_loss: 0.013651901152398851
          total_loss: 0.023055990288654963
          vf_explained_var: 0.03018764592707157
          vf_loss: 0.011970915055523315
    num_agent_steps_sampled: 979000
    num_agent_steps_trained: 979000
    num_steps_sampled: 979000
    num_steps_trained: 9790

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,979,41702.3,979000,-2.4866,-2.04,-3.41,248.66




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 980000
  custom_metrics: {}
  date: 2021-10-25_04-07-17
  done: false
  episode_len_mean: 248.86
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.488599999999991
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3542
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.058951999108534985
          cur_lr: 5.000000000000001e-05
          entropy: 0.5323279241720835
          entropy_coeff: 0.009999999999999998
          kl: 0.056210856769227845
          policy_loss: -0.0017236784100532533
          total_loss: 0.0067963584429687925
          vf_explained_var: 0.20953315496444702
          vf_loss: 0.010529575874615046
    num_agent_steps_sampled: 980000
    num_agent_steps_trained: 980000
    num_steps_sampled: 980000
    num_steps_trained: 98

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,980,41766.7,980000,-2.4886,-2.04,-3.41,248.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 981000
  custom_metrics: {}
  date: 2021-10-25_04-08-00
  done: false
  episode_len_mean: 248.19
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4818999999999907
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3546
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.7719758007261488
          entropy_coeff: 0.009999999999999998
          kl: 0.005917015748385602
          policy_loss: 0.01937662528620826
          total_loss: 0.024851443701320225
          vf_explained_var: 0.04474637284874916
          vf_loss: 0.012671344758321842
    num_agent_steps_sampled: 981000
    num_agent_steps_trained: 981000
    num_steps_sampled: 981000
    num_steps_trained: 981000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,981,41809.8,981000,-2.4819,-2.04,-3.41,248.19


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 982000
  custom_metrics: {}
  date: 2021-10-25_04-08-41
  done: false
  episode_len_mean: 248.36
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4835999999999903
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 3
  episodes_total: 3549
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.7079919377962748
          entropy_coeff: 0.009999999999999998
          kl: 0.01232866479297703
          policy_loss: -0.038577927069531545
          total_loss: -0.038387078709072535
          vf_explained_var: 0.6254318952560425
          vf_loss: 0.006180569245609351
    num_agent_steps_sampled: 982000
    num_agent_steps_trained: 982000
    num_steps_sampled: 982000
    num_steps_trained: 98200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,982,41851,982000,-2.4836,-2.04,-3.41,248.36


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 983000
  custom_metrics: {}
  date: 2021-10-25_04-09-27
  done: false
  episode_len_mean: 247.6
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4759999999999907
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3553
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.5088422305054134
          entropy_coeff: 0.009999999999999998
          kl: 0.0057525675814954665
          policy_loss: 0.005021849109066857
          total_loss: 0.015029170529709921
          vf_explained_var: 0.07376697659492493
          vf_loss: 0.014587059016856882
    num_agent_steps_sampled: 983000
    num_agent_steps_trained: 983000
    num_steps_sampled: 983000
    num_steps_trained: 98300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,983,41896.9,983000,-2.476,-2.04,-3.41,247.6


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 984000
  custom_metrics: {}
  date: 2021-10-25_04-10-12
  done: false
  episode_len_mean: 247.95
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.479499999999991
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3557
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.4562246325943205
          entropy_coeff: 0.009999999999999998
          kl: 0.009885699647949765
          policy_loss: -0.009434523805975915
          total_loss: 0.002102228957745764
          vf_explained_var: 0.08639112859964371
          vf_loss: 0.01522482620138261
    num_agent_steps_sampled: 984000
    num_agent_steps_trained: 984000
    num_steps_sampled: 984000
    num_steps_trained: 984000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,984,41941.7,984000,-2.4795,-2.04,-3.41,247.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 985000
  custom_metrics: {}
  date: 2021-10-25_04-10-57
  done: false
  episode_len_mean: 247.05
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4704999999999915
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3561
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.428675451874733
          entropy_coeff: 0.009999999999999998
          kl: 0.00360134679363379
          policy_loss: -0.04146094901694192
          total_loss: -0.03298348834117253
          vf_explained_var: 0.21598465740680695
          vf_loss: 0.012445755121815536
    num_agent_steps_sampled: 985000
    num_agent_steps_trained: 985000
    num_steps_sampled: 985000
    num_steps_trained: 985000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,985,41986.1,985000,-2.4705,-2.04,-3.41,247.05


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 986000
  custom_metrics: {}
  date: 2021-10-25_04-11-40
  done: false
  episode_len_mean: 247.03
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4702999999999915
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3565
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04421399933140124
          cur_lr: 5.000000000000001e-05
          entropy: 0.4616372283962038
          entropy_coeff: 0.009999999999999998
          kl: 0.027112825065713823
          policy_loss: -0.01619430250591702
          total_loss: -0.005534038858281242
          vf_explained_var: 0.07252839207649231
          vf_loss: 0.014077871323873599
    num_agent_steps_sampled: 986000
    num_agent_steps_trained: 986000
    num_steps_sampled: 986000
    num_steps_trained: 9860

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,986,42029,986000,-2.4703,-2.04,-3.41,247.03




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 987000
  custom_metrics: {}
  date: 2021-10-25_04-12-41
  done: false
  episode_len_mean: 246.98
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4697999999999913
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 5
  episodes_total: 3570
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06632099899710187
          cur_lr: 5.000000000000001e-05
          entropy: 0.5940936687919829
          entropy_coeff: 0.009999999999999998
          kl: 0.029912931286329582
          policy_loss: 0.0588832986023691
          total_loss: 0.06784300224648582
          vf_explained_var: 0.08319021016359329
          vf_loss: 0.012916784893928304
    num_agent_steps_sampled: 987000
    num_agent_steps_trained: 987000
    num_steps_sampled: 987000
    num_steps_trained: 987000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,987,42090.3,987000,-2.4698,-2.04,-3.41,246.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 988000
  custom_metrics: {}
  date: 2021-10-25_04-13-28
  done: false
  episode_len_mean: 247.19
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.471899999999991
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3574
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09948149849565277
          cur_lr: 5.000000000000001e-05
          entropy: 0.4715982665618261
          entropy_coeff: 0.009999999999999998
          kl: 0.011805666146139343
          policy_loss: -0.0008703889118300544
          total_loss: 0.009170397288269468
          vf_explained_var: 0.21268604695796967
          vf_loss: 0.013582322167025672
    num_agent_steps_sampled: 988000
    num_agent_steps_trained: 988000
    num_steps_sampled: 988000
    num_steps_trained: 9880

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,988,42137.6,988000,-2.4719,-2.04,-3.41,247.19


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 989000
  custom_metrics: {}
  date: 2021-10-25_04-14-16
  done: false
  episode_len_mean: 247.16
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.471599999999991
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3578
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09948149849565277
          cur_lr: 5.000000000000001e-05
          entropy: 0.34863699823617933
          entropy_coeff: 0.009999999999999998
          kl: 0.006801777857205672
          policy_loss: 0.04881784054968092
          total_loss: 0.05959744917021857
          vf_explained_var: 0.10369300842285156
          vf_loss: 0.01358932757543193
    num_agent_steps_sampled: 989000
    num_agent_steps_trained: 989000
    num_steps_sampled: 989000
    num_steps_trained: 989000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,989,42185.6,989000,-2.4716,-2.04,-3.41,247.16


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 990000
  custom_metrics: {}
  date: 2021-10-25_04-15-06
  done: false
  episode_len_mean: 247.05
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4704999999999915
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3582
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09948149849565277
          cur_lr: 5.000000000000001e-05
          entropy: 0.31475656628608706
          entropy_coeff: 0.009999999999999998
          kl: 0.0038848400878440732
          policy_loss: -0.06276952847838402
          total_loss: -0.05164211243391037
          vf_explained_var: 0.07231742143630981
          vf_loss: 0.013888510326958365
    num_agent_steps_sampled: 990000
    num_agent_steps_trained: 990000
    num_steps_sampled: 990000
    num_steps_trained: 990

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,990,42235.6,990000,-2.4705,-2.04,-3.41,247.05


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 991000
  custom_metrics: {}
  date: 2021-10-25_04-15-54
  done: false
  episode_len_mean: 247.0
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.469999999999991
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 5
  episodes_total: 3587
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.049740749247826385
          cur_lr: 5.000000000000001e-05
          entropy: 0.4075845056109958
          entropy_coeff: 0.009999999999999998
          kl: 0.006326529723232811
          policy_loss: -0.026898897935946783
          total_loss: -0.015080049427019225
          vf_explained_var: 0.24875634908676147
          vf_loss: 0.01558000825655957
    num_agent_steps_sampled: 991000
    num_agent_steps_trained: 991000
    num_steps_sampled: 991000
    num_steps_trained: 99100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,991,42282.9,991000,-2.47,-2.04,-3.41,247


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 992000
  custom_metrics: {}
  date: 2021-10-25_04-16-38
  done: false
  episode_len_mean: 247.35
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.473499999999991
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3591
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.049740749247826385
          cur_lr: 5.000000000000001e-05
          entropy: 0.6067348129219479
          entropy_coeff: 0.009999999999999998
          kl: 0.017628831721810793
          policy_loss: 0.04773395674096213
          total_loss: 0.05376613438129425
          vf_explained_var: 0.0914587751030922
          vf_loss: 0.01122265243385401
    num_agent_steps_sampled: 992000
    num_agent_steps_trained: 992000
    num_steps_sampled: 992000
    num_steps_trained: 992000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,992,42327.1,992000,-2.4735,-2.04,-3.41,247.35


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 993000
  custom_metrics: {}
  date: 2021-10-25_04-17-26
  done: false
  episode_len_mean: 246.71
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4670999999999914
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3595
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.049740749247826385
          cur_lr: 5.000000000000001e-05
          entropy: 0.34176795151498585
          entropy_coeff: 0.009999999999999998
          kl: 0.004718097328558625
          policy_loss: 0.019658071796099345
          total_loss: 0.029277504483858744
          vf_explained_var: 0.12116833031177521
          vf_loss: 0.012802430325084262
    num_agent_steps_sampled: 993000
    num_agent_steps_trained: 993000
    num_steps_sampled: 993000
    num_steps_trained: 993

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,993,42375.5,993000,-2.4671,-2.04,-3.41,246.71


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 994000
  custom_metrics: {}
  date: 2021-10-25_04-18-11
  done: false
  episode_len_mean: 246.38
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4637999999999916
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3599
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024870374623913193
          cur_lr: 5.000000000000001e-05
          entropy: 0.4368361132012473
          entropy_coeff: 0.009999999999999998
          kl: 0.008701462187626923
          policy_loss: 0.023063378698296017
          total_loss: 0.03070764574739668
          vf_explained_var: 0.08080481737852097
          vf_loss: 0.011796217722197374
    num_agent_steps_sampled: 994000
    num_agent_steps_trained: 994000
    num_steps_sampled: 994000
    num_steps_trained: 99400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,994,42420.3,994000,-2.4638,-2.04,-3.41,246.38




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 995000
  custom_metrics: {}
  date: 2021-10-25_04-19-15
  done: false
  episode_len_mean: 246.03
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4602999999999913
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3603
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024870374623913193
          cur_lr: 5.000000000000001e-05
          entropy: 0.5038042237361272
          entropy_coeff: 0.009999999999999998
          kl: 0.007183168257480017
          policy_loss: -0.04733606113327874
          total_loss: -0.0380791907509168
          vf_explained_var: 0.062236759811639786
          vf_loss: 0.014116267446014616
    num_agent_steps_sampled: 995000
    num_agent_steps_trained: 995000
    num_steps_sampled: 995000
    num_steps_trained: 9950

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,995,42483.9,995000,-2.4603,-2.04,-3.41,246.03


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 996000
  custom_metrics: {}
  date: 2021-10-25_04-20-02
  done: false
  episode_len_mean: 245.72
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.4571999999999914
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3607
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024870374623913193
          cur_lr: 5.000000000000001e-05
          entropy: 0.3472377397947841
          entropy_coeff: 0.009999999999999998
          kl: 0.007020056256361842
          policy_loss: -0.11634077280759811
          total_loss: -0.10218734964728356
          vf_explained_var: 0.07060626149177551
          vf_loss: 0.01745121060974068
    num_agent_steps_sampled: 996000
    num_agent_steps_trained: 996000
    num_steps_sampled: 996000
    num_steps_trained: 99600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,996,42530.8,996000,-2.4572,-2.04,-3.41,245.72


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 997000
  custom_metrics: {}
  date: 2021-10-25_04-20-47
  done: false
  episode_len_mean: 246.63
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.466299999999991
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 5
  episodes_total: 3612
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024870374623913193
          cur_lr: 5.000000000000001e-05
          entropy: 0.4181390192773607
          entropy_coeff: 0.009999999999999998
          kl: 0.015594210016919243
          policy_loss: -0.01369418634308709
          total_loss: -0.002249236073758867
          vf_explained_var: 0.11413773149251938
          vf_loss: 0.015238506698773966
    num_agent_steps_sampled: 997000
    num_agent_steps_trained: 997000
    num_steps_sampled: 997000
    num_steps_trained: 9970

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,997,42576.4,997000,-2.4663,-2.08,-3.41,246.63


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 998000
  custom_metrics: {}
  date: 2021-10-25_04-21-33
  done: false
  episode_len_mean: 246.28
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.462799999999991
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3616
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024870374623913193
          cur_lr: 5.000000000000001e-05
          entropy: 0.382088532547156
          entropy_coeff: 0.009999999999999998
          kl: 0.0038154624649618224
          policy_loss: 0.028035535373621516
          total_loss: 0.037726317014959126
          vf_explained_var: 0.06622599810361862
          vf_loss: 0.013416776050710016
    num_agent_steps_sampled: 998000
    num_agent_steps_trained: 998000
    num_steps_sampled: 998000
    num_steps_trained: 99800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,998,42622,998000,-2.4628,-2.08,-3.41,246.28


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 999000
  custom_metrics: {}
  date: 2021-10-25_04-22-19
  done: false
  episode_len_mean: 245.66
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.4565999999999915
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3620
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012435187311956596
          cur_lr: 5.000000000000001e-05
          entropy: 0.3434101657734977
          entropy_coeff: 0.009999999999999998
          kl: 0.0037667317945539324
          policy_loss: 0.02464650140868293
          total_loss: 0.035439174042807685
          vf_explained_var: 0.04602142795920372
          vf_loss: 0.014179935865104198
    num_agent_steps_sampled: 999000
    num_agent_steps_trained: 999000
    num_steps_sampled: 999000
    num_steps_trained: 9990

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,999,42667.9,999000,-2.4566,-2.08,-3.41,245.66


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1000000
  custom_metrics: {}
  date: 2021-10-25_04-23-07
  done: false
  episode_len_mean: 244.7
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.446999999999991
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3624
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006217593655978298
          cur_lr: 5.000000000000001e-05
          entropy: 0.2394650959306293
          entropy_coeff: 0.009999999999999998
          kl: 0.003875111828099244
          policy_loss: -0.010990582075383929
          total_loss: 0.000866014924314287
          vf_explained_var: 0.056103527545928955
          vf_loss: 0.014227156527340412
    num_agent_steps_sampled: 1000000
    num_agent_steps_trained: 1000000
    num_steps_sampled: 1000000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1000,42716.2,1000000,-2.447,-2.08,-3.41,244.7


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1001000
  custom_metrics: {}
  date: 2021-10-25_04-23-54
  done: false
  episode_len_mean: 243.71
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.437099999999992
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 5
  episodes_total: 3629
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.003108796827989149
          cur_lr: 5.000000000000001e-05
          entropy: 0.3008534676498837
          entropy_coeff: 0.009999999999999998
          kl: 0.015909662236113713
          policy_loss: -0.0038008344670136768
          total_loss: 0.011174726568990283
          vf_explained_var: 0.07926613837480545
          vf_loss: 0.017934634960773917
    num_agent_steps_sampled: 1001000
    num_agent_steps_trained: 1001000
    num_steps_sampled: 1001000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1001,42763.3,1001000,-2.4371,-2.08,-3.41,243.71




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1002000
  custom_metrics: {}
  date: 2021-10-25_04-25-01
  done: false
  episode_len_mean: 241.01
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4100999999999924
  episode_reward_min: -2.899999999999982
  episodes_this_iter: 4
  episodes_total: 3633
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.003108796827989149
          cur_lr: 5.000000000000001e-05
          entropy: 0.721421883503596
          entropy_coeff: 0.009999999999999998
          kl: 0.05567596745812272
          policy_loss: 0.01407068951262368
          total_loss: 0.016960439913802677
          vf_explained_var: 0.28395938873291016
          vf_loss: 0.009930881006746656
    num_agent_steps_sampled: 1002000
    num_agent_steps_trained: 1002000
    num_steps_sampled: 1002000
    num_steps_trained: 1002

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1002,42829.8,1002000,-2.4101,-1.93,-2.9,241.01


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1003000
  custom_metrics: {}
  date: 2021-10-25_04-25-48
  done: false
  episode_len_mean: 240.53
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4052999999999924
  episode_reward_min: -2.899999999999982
  episodes_this_iter: 4
  episodes_total: 3637
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.004663195241983722
          cur_lr: 5.000000000000001e-05
          entropy: 0.4830857826603783
          entropy_coeff: 0.009999999999999998
          kl: 0.012805222910003284
          policy_loss: -0.024453199033935866
          total_loss: -0.0162851692073875
          vf_explained_var: 0.29574665427207947
          vf_loss: 0.012939172817601098
    num_agent_steps_sampled: 1003000
    num_agent_steps_trained: 1003000
    num_steps_sampled: 1003000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1003,42876.9,1003000,-2.4053,-1.93,-2.9,240.53


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1004000
  custom_metrics: {}
  date: 2021-10-25_04-26-33
  done: false
  episode_len_mean: 240.61
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.406099999999993
  episode_reward_min: -2.899999999999982
  episodes_this_iter: 4
  episodes_total: 3641
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.004663195241983722
          cur_lr: 5.000000000000001e-05
          entropy: 0.5946584390269385
          entropy_coeff: 0.009999999999999998
          kl: 0.038042495925994756
          policy_loss: -0.056619852201806174
          total_loss: -0.04859854338897599
          vf_explained_var: 0.3708741068840027
          vf_loss: 0.013790495419460867
    num_agent_steps_sampled: 1004000
    num_agent_steps_trained: 1004000
    num_steps_sampled: 1004000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1004,42921.8,1004000,-2.4061,-1.93,-2.9,240.61


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1005000
  custom_metrics: {}
  date: 2021-10-25_04-27-17
  done: false
  episode_len_mean: 240.69
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4068999999999923
  episode_reward_min: -2.899999999999982
  episodes_this_iter: 4
  episodes_total: 3645
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006994792862975584
          cur_lr: 5.000000000000001e-05
          entropy: 0.7679523474640316
          entropy_coeff: 0.009999999999999998
          kl: 0.03267911965304917
          policy_loss: 0.004948636889457703
          total_loss: 0.008906582163439857
          vf_explained_var: 0.4909473955631256
          vf_loss: 0.011408884254180723
    num_agent_steps_sampled: 1005000
    num_agent_steps_trained: 1005000
    num_steps_sampled: 1005000
    num_steps_trained: 100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1005,42965.9,1005000,-2.4069,-1.93,-2.9,240.69


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1006000
  custom_metrics: {}
  date: 2021-10-25_04-27-58
  done: false
  episode_len_mean: 241.82
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.418199999999992
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 3
  episodes_total: 3648
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010492189294463378
          cur_lr: 5.000000000000001e-05
          entropy: 1.1486597584353553
          entropy_coeff: 0.009999999999999998
          kl: 0.06700765687461724
          policy_loss: -0.10844175103637907
          total_loss: -0.10831952393054962
          vf_explained_var: 0.5922566652297974
          vf_loss: 0.010905767946193615
    num_agent_steps_sampled: 1006000
    num_agent_steps_trained: 1006000
    num_steps_sampled: 1006000
    num_steps_trained: 100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1006,43007.2,1006000,-2.4182,-1.93,-3.31,241.82


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1007000
  custom_metrics: {}
  date: 2021-10-25_04-28-41
  done: false
  episode_len_mean: 242.7
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.426999999999992
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3652
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.015738283941695068
          cur_lr: 5.000000000000001e-05
          entropy: 0.9859632651011149
          entropy_coeff: 0.009999999999999998
          kl: 0.027791768122000824
          policy_loss: -0.0269388508465555
          total_loss: -0.026246821342243088
          vf_explained_var: 0.630785346031189
          vf_loss: 0.01011426645434565
    num_agent_steps_sampled: 1007000
    num_agent_steps_trained: 1007000
    num_steps_sampled: 1007000
    num_steps_trained: 10070

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1007,43049.6,1007000,-2.427,-1.93,-3.31,242.7


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1008000
  custom_metrics: {}
  date: 2021-10-25_04-29-25
  done: false
  episode_len_mean: 243.15
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4314999999999922
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3656
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.023607425912542605
          cur_lr: 5.000000000000001e-05
          entropy: 1.090014377567503
          entropy_coeff: 0.009999999999999998
          kl: 0.03827488895620053
          policy_loss: 0.020931171129147212
          total_loss: 0.021060085131062403
          vf_explained_var: 0.6259310245513916
          vf_loss: 0.01012548679071996
    num_agent_steps_sampled: 1008000
    num_agent_steps_trained: 1008000
    num_steps_sampled: 1008000
    num_steps_trained: 1008

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1008,43093.6,1008000,-2.4315,-1.93,-3.31,243.15




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1009000
  custom_metrics: {}
  date: 2021-10-25_04-30-25
  done: false
  episode_len_mean: 243.45
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.434499999999992
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3660
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.035411138868813904
          cur_lr: 5.000000000000001e-05
          entropy: 0.7388367699252234
          entropy_coeff: 0.009999999999999998
          kl: 0.010314205303307039
          policy_loss: 0.020132366485065886
          total_loss: 0.023130732940302955
          vf_explained_var: 0.5017894506454468
          vf_loss: 0.010021495239602194
    num_agent_steps_sampled: 1009000
    num_agent_steps_trained: 1009000
    num_steps_sampled: 1009000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1009,43153.8,1009000,-2.4345,-1.93,-3.31,243.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1010000
  custom_metrics: {}
  date: 2021-10-25_04-31-12
  done: false
  episode_len_mean: 243.6
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4359999999999915
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3664
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.035411138868813904
          cur_lr: 5.000000000000001e-05
          entropy: 0.8395742966069115
          entropy_coeff: 0.009999999999999998
          kl: 0.012292452176093585
          policy_loss: 0.0028907294902536605
          total_loss: 0.005762047982878155
          vf_explained_var: 0.5190038681030273
          vf_loss: 0.010831773084484868
    num_agent_steps_sampled: 1010000
    num_agent_steps_trained: 1010000
    num_steps_sampled: 1010000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1010,43200.8,1010000,-2.436,-1.93,-3.31,243.6


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1011000
  custom_metrics: {}
  date: 2021-10-25_04-31-58
  done: false
  episode_len_mean: 243.9
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4389999999999916
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3668
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.035411138868813904
          cur_lr: 5.000000000000001e-05
          entropy: 0.6669378648201625
          entropy_coeff: 0.009999999999999998
          kl: 0.006800606278117015
          policy_loss: 0.04254450541403559
          total_loss: 0.045519177118937174
          vf_explained_var: 0.43712612986564636
          vf_loss: 0.009403231600299478
    num_agent_steps_sampled: 1011000
    num_agent_steps_trained: 1011000
    num_steps_sampled: 1011000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1011,43247,1011000,-2.439,-1.93,-3.31,243.9


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1012000
  custom_metrics: {}
  date: 2021-10-25_04-32-45
  done: false
  episode_len_mean: 243.86
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4385999999999917
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3672
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.035411138868813904
          cur_lr: 5.000000000000001e-05
          entropy: 0.6794271879725986
          entropy_coeff: 0.009999999999999998
          kl: 0.013233150089048648
          policy_loss: 0.003831651227341758
          total_loss: 0.009040834092431598
          vf_explained_var: 0.37660932540893555
          vf_loss: 0.011534852617316776
    num_agent_steps_sampled: 1012000
    num_agent_steps_trained: 1012000
    num_steps_sampled: 1012000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1012,43293.8,1012000,-2.4386,-1.93,-3.31,243.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1013000
  custom_metrics: {}
  date: 2021-10-25_04-33-33
  done: false
  episode_len_mean: 244.24
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4423999999999917
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3676
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.035411138868813904
          cur_lr: 5.000000000000001e-05
          entropy: 0.4141449428266949
          entropy_coeff: 0.009999999999999998
          kl: 0.006988716317935258
          policy_loss: 0.03456523790955544
          total_loss: 0.03991709194249577
          vf_explained_var: 0.3165797293186188
          vf_loss: 0.009245826250925246
    num_agent_steps_sampled: 1013000
    num_agent_steps_trained: 1013000
    num_steps_sampled: 1013000
    num_steps_trained: 101

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1013,43341.5,1013000,-2.4424,-1.93,-3.31,244.24


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1014000
  custom_metrics: {}
  date: 2021-10-25_04-34-20
  done: false
  episode_len_mean: 244.09
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.440899999999992
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 5
  episodes_total: 3681
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.035411138868813904
          cur_lr: 5.000000000000001e-05
          entropy: 0.3269360111819373
          entropy_coeff: 0.009999999999999998
          kl: 0.004772564225488177
          policy_loss: -0.023374337454636893
          total_loss: -0.009622335351175732
          vf_explained_var: 0.13582409918308258
          vf_loss: 0.016852358997695976
    num_agent_steps_sampled: 1014000
    num_agent_steps_trained: 1014000
    num_steps_sampled: 1014000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1014,43388.2,1014000,-2.4409,-1.93,-3.31,244.09


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1015000
  custom_metrics: {}
  date: 2021-10-25_04-35-07
  done: false
  episode_len_mean: 244.43
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4442999999999917
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3685
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017705569434406952
          cur_lr: 5.000000000000001e-05
          entropy: 0.6073416431744894
          entropy_coeff: 0.009999999999999998
          kl: 0.010584996489427172
          policy_loss: 0.01786348529987865
          total_loss: 0.02343412066499392
          vf_explained_var: 0.33405160903930664
          vf_loss: 0.011456636546386612
    num_agent_steps_sampled: 1015000
    num_agent_steps_trained: 1015000
    num_steps_sampled: 1015000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1015,43435.4,1015000,-2.4443,-1.93,-3.31,244.43


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1016000
  custom_metrics: {}
  date: 2021-10-25_04-35-53
  done: false
  episode_len_mean: 244.45
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.444499999999992
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3689
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017705569434406952
          cur_lr: 5.000000000000001e-05
          entropy: 0.37169929080539277
          entropy_coeff: 0.009999999999999998
          kl: 0.004584930999893386
          policy_loss: 0.025373739252487818
          total_loss: 0.03360875820120176
          vf_explained_var: 0.20088934898376465
          vf_loss: 0.011870832648128271
    num_agent_steps_sampled: 1016000
    num_agent_steps_trained: 1016000
    num_steps_sampled: 1016000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1016,43482,1016000,-2.4445,-1.93,-3.31,244.45




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1017000
  custom_metrics: {}
  date: 2021-10-25_04-37-00
  done: false
  episode_len_mean: 244.34
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.443399999999992
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3693
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.008852784717203476
          cur_lr: 5.000000000000001e-05
          entropy: 0.5656511164373822
          entropy_coeff: 0.009999999999999998
          kl: 0.013892963093735281
          policy_loss: 0.012300759553909302
          total_loss: 0.01925672565897306
          vf_explained_var: 0.24167031049728394
          vf_loss: 0.012489484540290302
    num_agent_steps_sampled: 1017000
    num_agent_steps_trained: 1017000
    num_steps_sampled: 1017000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1017,43548.5,1017000,-2.4434,-1.93,-3.31,244.34


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1018000
  custom_metrics: {}
  date: 2021-10-25_04-37-48
  done: false
  episode_len_mean: 244.31
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4430999999999923
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3697
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.008852784717203476
          cur_lr: 5.000000000000001e-05
          entropy: 0.39674422277344595
          entropy_coeff: 0.009999999999999998
          kl: 0.005793557210806854
          policy_loss: -0.05875776227977541
          total_loss: -0.04982715174555778
          vf_explained_var: 0.15160349011421204
          vf_loss: 0.012846764477176798
    num_agent_steps_sampled: 1018000
    num_agent_steps_trained: 1018000
    num_steps_sampled: 1018000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1018,43596,1018000,-2.4431,-1.93,-3.31,244.31


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1019000
  custom_metrics: {}
  date: 2021-10-25_04-38-35
  done: false
  episode_len_mean: 244.17
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4416999999999915
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 5
  episodes_total: 3702
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.008852784717203476
          cur_lr: 5.000000000000001e-05
          entropy: 0.538403375281228
          entropy_coeff: 0.009999999999999998
          kl: 0.012692840682726036
          policy_loss: -0.027907841238710616
          total_loss: -0.017186197141806286
          vf_explained_var: 0.2399669587612152
          vf_loss: 0.01599331164939536
    num_agent_steps_sampled: 1019000
    num_agent_steps_trained: 1019000
    num_steps_sampled: 1019000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1019,43643.8,1019000,-2.4417,-1.93,-3.31,244.17


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1020000
  custom_metrics: {}
  date: 2021-10-25_04-39-20
  done: false
  episode_len_mean: 244.75
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4474999999999922
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3706
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.008852784717203476
          cur_lr: 5.000000000000001e-05
          entropy: 0.6689644628100925
          entropy_coeff: 0.009999999999999998
          kl: 0.017737193437507574
          policy_loss: -0.008556343532270855
          total_loss: -0.003004218555159039
          vf_explained_var: 0.42518287897109985
          vf_loss: 0.012084748332078258
    num_agent_steps_sampled: 1020000
    num_agent_steps_trained: 1020000
    num_steps_sampled: 1020000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1020,43688.7,1020000,-2.4475,-1.93,-3.31,244.75


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1021000
  custom_metrics: {}
  date: 2021-10-25_04-40-07
  done: false
  episode_len_mean: 244.86
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4485999999999923
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3710
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.008852784717203476
          cur_lr: 5.000000000000001e-05
          entropy: 0.7796413666672177
          entropy_coeff: 0.009999999999999998
          kl: 0.01836991135601316
          policy_loss: -0.016456624617179234
          total_loss: -0.012195734389954143
          vf_explained_var: 0.4056893587112427
          vf_loss: 0.011894680859727991
    num_agent_steps_sampled: 1021000
    num_agent_steps_trained: 1021000
    num_steps_sampled: 1021000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1021,43735,1021000,-2.4486,-1.93,-3.31,244.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1022000
  custom_metrics: {}
  date: 2021-10-25_04-40-49
  done: false
  episode_len_mean: 245.41
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4540999999999915
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 3
  episodes_total: 3713
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.008852784717203476
          cur_lr: 5.000000000000001e-05
          entropy: 0.7793452660242717
          entropy_coeff: 0.009999999999999998
          kl: 0.02069720529509122
          policy_loss: -0.0791643703977267
          total_loss: -0.07493777043289608
          vf_explained_var: 0.44008323550224304
          vf_loss: 0.011836826552947361
    num_agent_steps_sampled: 1022000
    num_agent_steps_trained: 1022000
    num_steps_sampled: 1022000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1022,43777.1,1022000,-2.4541,-1.93,-3.31,245.41


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1023000
  custom_metrics: {}
  date: 2021-10-25_04-41-33
  done: false
  episode_len_mean: 246.04
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.460399999999991
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3717
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01327917707580521
          cur_lr: 5.000000000000001e-05
          entropy: 0.8352448960145314
          entropy_coeff: 0.009999999999999998
          kl: 0.015423579279743648
          policy_loss: 0.016048136187924278
          total_loss: 0.019733753303686777
          vf_explained_var: 0.4486677348613739
          vf_loss: 0.011833251412543986
    num_agent_steps_sampled: 1023000
    num_agent_steps_trained: 1023000
    num_steps_sampled: 1023000
    num_steps_trained: 102

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1023,43821.3,1023000,-2.4604,-1.93,-3.31,246.04




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1024000
  custom_metrics: {}
  date: 2021-10-25_04-42-36
  done: false
  episode_len_mean: 247.0
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4699999999999913
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3721
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01327917707580521
          cur_lr: 5.000000000000001e-05
          entropy: 0.6697649310032526
          entropy_coeff: 0.009999999999999998
          kl: 0.015133816871107935
          policy_loss: -0.036938540264964105
          total_loss: -0.03166636481053299
          vf_explained_var: 0.4024607241153717
          vf_loss: 0.011768862698227167
    num_agent_steps_sampled: 1024000
    num_agent_steps_trained: 1024000
    num_steps_sampled: 1024000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1024,43884.6,1024000,-2.47,-1.93,-3.31,247


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1025000
  custom_metrics: {}
  date: 2021-10-25_04-43-20
  done: false
  episode_len_mean: 248.26
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.4825999999999913
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3725
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01327917707580521
          cur_lr: 5.000000000000001e-05
          entropy: 0.8613032546308306
          entropy_coeff: 0.009999999999999998
          kl: 0.025112126367048758
          policy_loss: 0.005848682837353812
          total_loss: 0.008782370057370927
          vf_explained_var: 0.4887981116771698
          vf_loss: 0.011213250654853053
    num_agent_steps_sampled: 1025000
    num_agent_steps_trained: 1025000
    num_steps_sampled: 1025000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1025,43928.5,1025000,-2.4826,-1.93,-3.31,248.26


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1026000
  custom_metrics: {}
  date: 2021-10-25_04-44-03
  done: false
  episode_len_mean: 249.72
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.497199999999991
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3729
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.019918765613707815
          cur_lr: 5.000000000000001e-05
          entropy: 0.9976332247257232
          entropy_coeff: 0.009999999999999998
          kl: 0.025854148329882934
          policy_loss: 0.032926597363419005
          total_loss: 0.03363523781299591
          vf_explained_var: 0.4804776608943939
          vf_loss: 0.010169987582291167
    num_agent_steps_sampled: 1026000
    num_agent_steps_trained: 1026000
    num_steps_sampled: 1026000
    num_steps_trained: 102

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1026,43970.8,1026000,-2.4972,-1.93,-3.31,249.72


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1027000
  custom_metrics: {}
  date: 2021-10-25_04-44-49
  done: false
  episode_len_mean: 250.73
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.50729999999999
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3733
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.029878148420561728
          cur_lr: 5.000000000000001e-05
          entropy: 0.8444824311468336
          entropy_coeff: 0.009999999999999998
          kl: 0.016946302828464294
          policy_loss: 0.036294385625256435
          total_loss: 0.04033704507682059
          vf_explained_var: 0.2672347128391266
          vf_loss: 0.011981155899249845
    num_agent_steps_sampled: 1027000
    num_agent_steps_trained: 1027000
    num_steps_sampled: 1027000
    num_steps_trained: 1027

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1027,44017,1027000,-2.5073,-2.22,-3.31,250.73


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1028000
  custom_metrics: {}
  date: 2021-10-25_04-45-31
  done: false
  episode_len_mean: 251.86
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.5185999999999904
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3737
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.029878148420561728
          cur_lr: 5.000000000000001e-05
          entropy: 1.043421134683821
          entropy_coeff: 0.009999999999999998
          kl: 0.037556154839308124
          policy_loss: -0.009173189434740278
          total_loss: -0.00807840484711859
          vf_explained_var: 0.24562367796897888
          vf_loss: 0.010406888824784093
    num_agent_steps_sampled: 1028000
    num_agent_steps_trained: 1028000
    num_steps_sampled: 1028000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1028,44059.3,1028000,-2.5186,-2.22,-3.31,251.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1029000
  custom_metrics: {}
  date: 2021-10-25_04-46-05
  done: false
  episode_len_mean: 253.63
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.53629999999999
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 2
  episodes_total: 3739
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04481722263084259
          cur_lr: 5.000000000000001e-05
          entropy: 1.2595798916286893
          entropy_coeff: 0.009999999999999998
          kl: 0.033762927407704285
          policy_loss: -0.12006466156906552
          total_loss: -0.12172502064042622
          vf_explained_var: -0.21069635450839996
          vf_loss: 0.009422281130941377
    num_agent_steps_sampled: 1029000
    num_agent_steps_trained: 1029000
    num_steps_sampled: 1029000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1029,44092.9,1029000,-2.5363,-2.22,-3.31,253.63


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1030000
  custom_metrics: {}
  date: 2021-10-25_04-46-50
  done: false
  episode_len_mean: 254.56
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.5455999999999896
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 5
  episodes_total: 3744
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0672258339462639
          cur_lr: 5.000000000000001e-05
          entropy: 0.7940005918343862
          entropy_coeff: 0.009999999999999998
          kl: 0.00711860478732144
          policy_loss: -0.019033788806862302
          total_loss: -0.009953406784269545
          vf_explained_var: 0.13049425184726715
          vf_loss: 0.01654183061586486
    num_agent_steps_sampled: 1030000
    num_agent_steps_trained: 1030000
    num_steps_sampled: 1030000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1030,44138.1,1030000,-2.5456,-2.22,-3.87,254.56


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1031000
  custom_metrics: {}
  date: 2021-10-25_04-47-32
  done: false
  episode_len_mean: 253.69
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.5368999999999895
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 3
  episodes_total: 3747
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0672258339462639
          cur_lr: 5.000000000000001e-05
          entropy: 0.9664448565906949
          entropy_coeff: 0.009999999999999998
          kl: 0.013078817886900396
          policy_loss: 0.014394610913263427
          total_loss: 0.015095665264460776
          vf_explained_var: 0.1429097205400467
          vf_loss: 0.009486267612212234
    num_agent_steps_sampled: 1031000
    num_agent_steps_trained: 1031000
    num_steps_sampled: 1031000
    num_steps_trained: 103

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1031,44180.2,1031000,-2.5369,-2.22,-3.87,253.69




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1032000
  custom_metrics: {}
  date: 2021-10-25_04-48-28
  done: false
  episode_len_mean: 254.28
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.5427999999999895
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 4
  episodes_total: 3751
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0672258339462639
          cur_lr: 5.000000000000001e-05
          entropy: 1.2347970565160116
          entropy_coeff: 0.009999999999999998
          kl: 0.01686941343347045
          policy_loss: -0.030189805808994503
          total_loss: -0.027202853974368838
          vf_explained_var: 0.1074000895023346
          vf_loss: 0.014200861203587718
    num_agent_steps_sampled: 1032000
    num_agent_steps_trained: 1032000
    num_steps_sampled: 1032000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1032,44235.9,1032000,-2.5428,-2.22,-3.87,254.28


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1033000
  custom_metrics: {}
  date: 2021-10-25_04-49-12
  done: false
  episode_len_mean: 253.56
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.53559999999999
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 4
  episodes_total: 3755
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0672258339462639
          cur_lr: 5.000000000000001e-05
          entropy: 0.8356481035550435
          entropy_coeff: 0.009999999999999998
          kl: 0.009400755682542833
          policy_loss: -0.0050763311071528325
          total_loss: 0.0009849716391828326
          vf_explained_var: 0.14099103212356567
          vf_loss: 0.013785811855147283
    num_agent_steps_sampled: 1033000
    num_agent_steps_trained: 1033000
    num_steps_sampled: 1033000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1033,44280.2,1033000,-2.5356,-2.22,-3.87,253.56


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1034000
  custom_metrics: {}
  date: 2021-10-25_04-49-51
  done: false
  episode_len_mean: 253.93
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.5392999999999897
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 3
  episodes_total: 3758
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0672258339462639
          cur_lr: 5.000000000000001e-05
          entropy: 0.9551609403557247
          entropy_coeff: 0.009999999999999998
          kl: 0.017480844575136226
          policy_loss: -0.04975045373042424
          total_loss: -0.04799521399868859
          vf_explained_var: -0.09747962653636932
          vf_loss: 0.010131682940603544
    num_agent_steps_sampled: 1034000
    num_agent_steps_trained: 1034000
    num_steps_sampled: 1034000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1034,44319.3,1034000,-2.5393,-2.22,-3.87,253.93


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1035000
  custom_metrics: {}
  date: 2021-10-25_04-50-34
  done: false
  episode_len_mean: 254.75
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.547499999999989
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 4
  episodes_total: 3762
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0672258339462639
          cur_lr: 5.000000000000001e-05
          entropy: 0.9298005567656623
          entropy_coeff: 0.009999999999999998
          kl: 0.009191968533961632
          policy_loss: 0.009963238280680445
          total_loss: 0.015822602560122807
          vf_explained_var: 0.05551886558532715
          vf_loss: 0.01453943331208494
    num_agent_steps_sampled: 1035000
    num_agent_steps_trained: 1035000
    num_steps_sampled: 1035000
    num_steps_trained: 1035

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1035,44361.9,1035000,-2.5475,-2.22,-3.87,254.75


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1036000
  custom_metrics: {}
  date: 2021-10-25_04-51-05
  done: false
  episode_len_mean: 258.1
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.5809999999999884
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 3
  episodes_total: 3765
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0672258339462639
          cur_lr: 5.000000000000001e-05
          entropy: 1.335792887210846
          entropy_coeff: 0.009999999999999998
          kl: 0.03456636980149692
          policy_loss: 0.023222245275974274
          total_loss: 0.022406082186434004
          vf_explained_var: -0.023898707702755928
          vf_loss: 0.01021800966312488
    num_agent_steps_sampled: 1036000
    num_agent_steps_trained: 1036000
    num_steps_sampled: 1036000
    num_steps_trained: 1036

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1036,44393.2,1036000,-2.581,-2.22,-3.87,258.1


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1037000
  custom_metrics: {}
  date: 2021-10-25_04-51-43
  done: false
  episode_len_mean: 259.97
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.5996999999999884
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 3
  episodes_total: 3768
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10083875091939583
          cur_lr: 5.000000000000001e-05
          entropy: 1.4336367699835035
          entropy_coeff: 0.009999999999999998
          kl: 0.029818953928312117
          policy_loss: 0.0422256872885757
          total_loss: 0.04162233819564184
          vf_explained_var: -0.2747254967689514
          vf_loss: 0.010726112737191013
    num_agent_steps_sampled: 1037000
    num_agent_steps_trained: 1037000
    num_steps_sampled: 1037000
    num_steps_trained: 1037

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1037,44431,1037000,-2.5997,-2.22,-3.87,259.97


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1038000
  custom_metrics: {}
  date: 2021-10-25_04-52-21
  done: false
  episode_len_mean: 261.36
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.6135999999999875
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 3
  episodes_total: 3771
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15125812637909375
          cur_lr: 5.000000000000001e-05
          entropy: 1.4338251153628032
          entropy_coeff: 0.009999999999999998
          kl: 0.011904421362663422
          policy_loss: 0.009833550370401806
          total_loss: 0.008878372775183784
          vf_explained_var: -0.14428415894508362
          vf_loss: 0.011582432958918313
    num_agent_steps_sampled: 1038000
    num_agent_steps_trained: 1038000
    num_steps_sampled: 1038000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1038,44468.9,1038000,-2.6136,-2.22,-3.87,261.36


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1039000
  custom_metrics: {}
  date: 2021-10-25_04-53-03
  done: false
  episode_len_mean: 262.86
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.6285999999999876
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 4
  episodes_total: 3775
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15125812637909375
          cur_lr: 5.000000000000001e-05
          entropy: 1.3012225932545132
          entropy_coeff: 0.009999999999999998
          kl: 0.012516825496043932
          policy_loss: -0.002564726894100507
          total_loss: 6.340684162245857e-05
          vf_explained_var: 0.2383566051721573
          vf_loss: 0.013747089118179347
    num_agent_steps_sampled: 1039000
    num_agent_steps_trained: 1039000
    num_steps_sampled: 1039000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1039,44510.5,1039000,-2.6286,-2.22,-3.87,262.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1040000
  custom_metrics: {}
  date: 2021-10-25_04-53-45
  done: false
  episode_len_mean: 264.09
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.6408999999999874
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 4
  episodes_total: 3779
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15125812637909375
          cur_lr: 5.000000000000001e-05
          entropy: 1.1968383312225341
          entropy_coeff: 0.009999999999999998
          kl: 0.009989975682441829
          policy_loss: 0.07012530242403349
          total_loss: 0.07609863364034229
          vf_explained_var: 0.059387724846601486
          vf_loss: 0.01643065491484271
    num_agent_steps_sampled: 1040000
    num_agent_steps_trained: 1040000
    num_steps_sampled: 1040000
    num_steps_trained: 104

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1040,44553.2,1040000,-2.6409,-2.22,-3.87,264.09




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1041000
  custom_metrics: {}
  date: 2021-10-25_04-54-48
  done: false
  episode_len_mean: 265.05
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.6504999999999876
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 3
  episodes_total: 3782
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15125812637909375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2249925997522142
          entropy_coeff: 0.009999999999999998
          kl: 0.017753036872901577
          policy_loss: -0.08336003836658266
          total_loss: -0.07834381345245574
          vf_explained_var: 0.18358014523983002
          vf_loss: 0.014580856584426429
    num_agent_steps_sampled: 1041000
    num_agent_steps_trained: 1041000
    num_steps_sampled: 1041000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1041,44616.1,1041000,-2.6505,-2.22,-3.87,265.05


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1042000
  custom_metrics: {}
  date: 2021-10-25_04-55-30
  done: false
  episode_len_mean: 266.12
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.661199999999987
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 4
  episodes_total: 3786
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15125812637909375
          cur_lr: 5.000000000000001e-05
          entropy: 1.1414164311356014
          entropy_coeff: 0.009999999999999998
          kl: 0.009434606722914612
          policy_loss: -0.00923244340552224
          total_loss: -0.0048152200877666475
          vf_explained_var: 0.11237721145153046
          vf_loss: 0.014404329823123084
    num_agent_steps_sampled: 1042000
    num_agent_steps_trained: 1042000
    num_steps_sampled: 1042000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1042,44658.1,1042000,-2.6612,-2.22,-3.87,266.12


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1043000
  custom_metrics: {}
  date: 2021-10-25_04-56-13
  done: false
  episode_len_mean: 266.93
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.669299999999987
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 4
  episodes_total: 3790
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15125812637909375
          cur_lr: 5.000000000000001e-05
          entropy: 1.1945934759245977
          entropy_coeff: 0.009999999999999998
          kl: 0.007803257000794714
          policy_loss: -0.004619087444411384
          total_loss: -0.0005820037590132819
          vf_explained_var: 0.14904269576072693
          vf_loss: 0.014802713599056005
    num_agent_steps_sampled: 1043000
    num_agent_steps_trained: 1043000
    num_steps_sampled: 1043000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1043,44701,1043000,-2.6693,-2.22,-3.87,266.93


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1044000
  custom_metrics: {}
  date: 2021-10-25_04-56-57
  done: false
  episode_len_mean: 267.8
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.6779999999999866
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 4
  episodes_total: 3794
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15125812637909375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2323670678668552
          entropy_coeff: 0.009999999999999998
          kl: 0.006855413064708671
          policy_loss: 0.023101661933792962
          total_loss: 0.02583469715383318
          vf_explained_var: 0.17737914621829987
          vf_loss: 0.014019769181807836
    num_agent_steps_sampled: 1044000
    num_agent_steps_trained: 1044000
    num_steps_sampled: 1044000
    num_steps_trained: 104

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1044,44744.7,1044000,-2.678,-2.22,-3.87,267.8


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1045000
  custom_metrics: {}
  date: 2021-10-25_04-57-40
  done: false
  episode_len_mean: 269.06
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.6905999999999866
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 4
  episodes_total: 3798
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15125812637909375
          cur_lr: 5.000000000000001e-05
          entropy: 1.3884530915154352
          entropy_coeff: 0.009999999999999998
          kl: 0.009093116002553872
          policy_loss: 0.015881629288196565
          total_loss: 0.018168628050221337
          vf_explained_var: 0.19337764382362366
          vf_loss: 0.014796119783487586
    num_agent_steps_sampled: 1045000
    num_agent_steps_trained: 1045000
    num_steps_sampled: 1045000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1045,44787.6,1045000,-2.6906,-2.22,-3.87,269.06


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1046000
  custom_metrics: {}
  date: 2021-10-25_04-58-23
  done: false
  episode_len_mean: 270.04
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.700399999999986
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 4
  episodes_total: 3802
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15125812637909375
          cur_lr: 5.000000000000001e-05
          entropy: 1.293818649980757
          entropy_coeff: 0.009999999999999998
          kl: 0.006010186254593779
          policy_loss: 0.0212872257663144
          total_loss: 0.022361347989903555
          vf_explained_var: 0.11956824362277985
          vf_loss: 0.013103218728469477
    num_agent_steps_sampled: 1046000
    num_agent_steps_trained: 1046000
    num_steps_sampled: 1046000
    num_steps_trained: 10460

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1046,44830.7,1046000,-2.7004,-2.22,-3.87,270.04


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1047000
  custom_metrics: {}
  date: 2021-10-25_04-59-05
  done: false
  episode_len_mean: 270.36
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.7035999999999865
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 3
  episodes_total: 3805
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15125812637909375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2623293161392213
          entropy_coeff: 0.009999999999999998
          kl: 0.005541470260887296
          policy_loss: 0.021896910087929832
          total_loss: 0.020673344284296034
          vf_explained_var: 0.1696057915687561
          vf_loss: 0.010561533127393987
    num_agent_steps_sampled: 1047000
    num_agent_steps_trained: 1047000
    num_steps_sampled: 1047000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1047,44873,1047000,-2.7036,-2.22,-3.87,270.36


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1048000
  custom_metrics: {}
  date: 2021-10-25_04-59-48
  done: false
  episode_len_mean: 271.49
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.7148999999999863
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 4
  episodes_total: 3809
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15125812637909375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2475377745098537
          entropy_coeff: 0.009999999999999998
          kl: 0.00857486327986672
          policy_loss: 0.02326455687483152
          total_loss: 0.02565445254246394
          vf_explained_var: 0.08599837124347687
          vf_loss: 0.013568254487795962
    num_agent_steps_sampled: 1048000
    num_agent_steps_trained: 1048000
    num_steps_sampled: 1048000
    num_steps_trained: 1048

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1048,44915.2,1048000,-2.7149,-2.22,-3.87,271.49




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1049000
  custom_metrics: {}
  date: 2021-10-25_05-00-49
  done: false
  episode_len_mean: 271.36
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.713599999999986
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 4
  episodes_total: 3813
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15125812637909375
          cur_lr: 5.000000000000001e-05
          entropy: 1.3442203097873264
          entropy_coeff: 0.009999999999999998
          kl: 0.005267195538459172
          policy_loss: 0.018464049365785388
          total_loss: 0.021700495895412234
          vf_explained_var: 0.07115587592124939
          vf_loss: 0.015881943847570155
    num_agent_steps_sampled: 1049000
    num_agent_steps_trained: 1049000
    num_steps_sampled: 1049000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1049,44977,1049000,-2.7136,-2.22,-3.87,271.36


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1050000
  custom_metrics: {}
  date: 2021-10-25_05-01-29
  done: false
  episode_len_mean: 271.58
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.715799999999986
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 4
  episodes_total: 3817
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15125812637909375
          cur_lr: 5.000000000000001e-05
          entropy: 1.278705030017429
          entropy_coeff: 0.009999999999999998
          kl: 0.009301796389971488
          policy_loss: 0.0060596475998560585
          total_loss: 0.009747928463750416
          vf_explained_var: 0.10256300866603851
          vf_loss: 0.015068359062489536
    num_agent_steps_sampled: 1050000
    num_agent_steps_trained: 1050000
    num_steps_sampled: 1050000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1050,45016.7,1050000,-2.7158,-2.22,-3.87,271.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1051000
  custom_metrics: {}
  date: 2021-10-25_05-02-15
  done: false
  episode_len_mean: 271.55
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.7154999999999863
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 4
  episodes_total: 3821
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15125812637909375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2399975882636176
          entropy_coeff: 0.009999999999999998
          kl: 0.004903456984716293
          policy_loss: -0.0004561404801077313
          total_loss: 0.0023183104478650624
          vf_explained_var: 0.11200044304132462
          vf_loss: 0.014432738048748837
    num_agent_steps_sampled: 1051000
    num_agent_steps_trained: 1051000
    num_steps_sampled: 1051000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1051,45062.1,1051000,-2.7155,-2.31,-3.87,271.55


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1052000
  custom_metrics: {}
  date: 2021-10-25_05-03-00
  done: false
  episode_len_mean: 271.9
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.7189999999999857
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 3
  episodes_total: 3824
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07562906318954687
          cur_lr: 5.000000000000001e-05
          entropy: 1.1957296954260932
          entropy_coeff: 0.009999999999999998
          kl: 0.010177825937451247
          policy_loss: -0.11061699266235034
          total_loss: -0.10701798515187369
          vf_explained_var: 0.1158541664481163
          vf_loss: 0.014786562944451968
    num_agent_steps_sampled: 1052000
    num_agent_steps_trained: 1052000
    num_steps_sampled: 1052000
    num_steps_trained: 105

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1052,45107.4,1052000,-2.719,-2.31,-3.87,271.9


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1053000
  custom_metrics: {}
  date: 2021-10-25_05-03-44
  done: false
  episode_len_mean: 271.76
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.7175999999999862
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 4
  episodes_total: 3828
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07562906318954687
          cur_lr: 5.000000000000001e-05
          entropy: 1.1178507579697503
          entropy_coeff: 0.009999999999999998
          kl: 0.007910541727776553
          policy_loss: -0.04004296954307291
          total_loss: -0.03735842319826285
          vf_explained_var: 0.14920133352279663
          vf_loss: 0.013264784485929541
    num_agent_steps_sampled: 1053000
    num_agent_steps_trained: 1053000
    num_steps_sampled: 1053000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1053,45151.7,1053000,-2.7176,-2.31,-3.87,271.76


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1054000
  custom_metrics: {}
  date: 2021-10-25_05-04-22
  done: false
  episode_len_mean: 272.51
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.7250999999999856
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 4
  episodes_total: 3832
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07562906318954687
          cur_lr: 5.000000000000001e-05
          entropy: 1.2740315192275578
          entropy_coeff: 0.009999999999999998
          kl: 0.01562662209858746
          policy_loss: 0.008278499295314153
          total_loss: 0.010705505890978708
          vf_explained_var: 0.21915170550346375
          vf_loss: 0.013985492113149828
    num_agent_steps_sampled: 1054000
    num_agent_steps_trained: 1054000
    num_steps_sampled: 1054000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1054,45189.1,1054000,-2.7251,-2.31,-3.87,272.51


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1055000
  custom_metrics: {}
  date: 2021-10-25_05-05-08
  done: false
  episode_len_mean: 271.81
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.718099999999985
  episode_reward_min: -3.8699999999999615
  episodes_this_iter: 4
  episodes_total: 3836
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07562906318954687
          cur_lr: 5.000000000000001e-05
          entropy: 0.9686412625842624
          entropy_coeff: 0.009999999999999998
          kl: 0.0059173301645400285
          policy_loss: 0.04302396045790778
          total_loss: 0.0463433638215065
          vf_explained_var: 0.11610978096723557
          vf_loss: 0.012558292080130842
    num_agent_steps_sampled: 1055000
    num_agent_steps_trained: 1055000
    num_steps_sampled: 1055000
    num_steps_trained: 10550

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1055,45235.3,1055000,-2.7181,-2.34,-3.87,271.81




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1056000
  custom_metrics: {}
  date: 2021-10-25_05-06-13
  done: false
  episode_len_mean: 268.63
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.6862999999999864
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 4
  episodes_total: 3840
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07562906318954687
          cur_lr: 5.000000000000001e-05
          entropy: 1.007160441743003
          entropy_coeff: 0.009999999999999998
          kl: 0.007914034935088202
          policy_loss: 0.013309572637081147
          total_loss: 0.01768601213892301
          vf_explained_var: 0.11016732454299927
          vf_loss: 0.013849511163102256
    num_agent_steps_sampled: 1056000
    num_agent_steps_trained: 1056000
    num_steps_sampled: 1056000
    num_steps_trained: 105

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1056,45300.3,1056000,-2.6863,-2.13,-3.74,268.63


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1057000
  custom_metrics: {}
  date: 2021-10-25_05-06-57
  done: false
  episode_len_mean: 268.86
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.688599999999986
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 4
  episodes_total: 3844
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07562906318954687
          cur_lr: 5.000000000000001e-05
          entropy: 0.9374177541997698
          entropy_coeff: 0.009999999999999998
          kl: 0.006552240240673907
          policy_loss: 0.014475099990765255
          total_loss: 0.020365134129921594
          vf_explained_var: 0.08516839146614075
          vf_loss: 0.014768675031761328
    num_agent_steps_sampled: 1057000
    num_agent_steps_trained: 1057000
    num_steps_sampled: 1057000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1057,45344.8,1057000,-2.6886,-2.13,-3.74,268.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1058000
  custom_metrics: {}
  date: 2021-10-25_05-07-41
  done: false
  episode_len_mean: 267.96
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.679599999999986
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 4
  episodes_total: 3848
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07562906318954687
          cur_lr: 5.000000000000001e-05
          entropy: 0.9505065302054088
          entropy_coeff: 0.009999999999999998
          kl: 0.005327623045176071
          policy_loss: 0.01890161294076178
          total_loss: 0.024595242573155297
          vf_explained_var: 0.07995893061161041
          vf_loss: 0.014795773207313484
    num_agent_steps_sampled: 1058000
    num_agent_steps_trained: 1058000
    num_steps_sampled: 1058000
    num_steps_trained: 105

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1058,45388.7,1058000,-2.6796,-2.13,-3.74,267.96


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1059000
  custom_metrics: {}
  date: 2021-10-25_05-08-24
  done: false
  episode_len_mean: 266.61
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.6660999999999877
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 4
  episodes_total: 3852
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07562906318954687
          cur_lr: 5.000000000000001e-05
          entropy: 1.0426734122965071
          entropy_coeff: 0.009999999999999998
          kl: 0.009583974010419006
          policy_loss: 0.011535697844293383
          total_loss: 0.016340170055627823
          vf_explained_var: 0.12279517203569412
          vf_loss: 0.014506382919434044
    num_agent_steps_sampled: 1059000
    num_agent_steps_trained: 1059000
    num_steps_sampled: 1059000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1059,45430.9,1059000,-2.6661,-2.13,-3.74,266.61


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1060000
  custom_metrics: {}
  date: 2021-10-25_05-09-08
  done: false
  episode_len_mean: 266.03
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.660299999999987
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 4
  episodes_total: 3856
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07562906318954687
          cur_lr: 5.000000000000001e-05
          entropy: 1.0431892547342512
          entropy_coeff: 0.009999999999999998
          kl: 0.015312348824933666
          policy_loss: 0.008080144226551057
          total_loss: 0.013732982592450248
          vf_explained_var: 0.10756145417690277
          vf_loss: 0.01492667357540793
    num_agent_steps_sampled: 1060000
    num_agent_steps_trained: 1060000
    num_steps_sampled: 1060000
    num_steps_trained: 106

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1060,45475.6,1060000,-2.6603,-2.13,-3.74,266.03


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1061000
  custom_metrics: {}
  date: 2021-10-25_05-09-52
  done: false
  episode_len_mean: 265.34
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.653399999999987
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 4
  episodes_total: 3860
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07562906318954687
          cur_lr: 5.000000000000001e-05
          entropy: 1.0123878445890215
          entropy_coeff: 0.009999999999999998
          kl: 0.006680216528019716
          policy_loss: 0.025268331749571694
          total_loss: 0.028940119180414413
          vf_explained_var: 0.09898500144481659
          vf_loss: 0.01329044945951965
    num_agent_steps_sampled: 1061000
    num_agent_steps_trained: 1061000
    num_steps_sampled: 1061000
    num_steps_trained: 106

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1061,45519.6,1061000,-2.6534,-2.13,-3.74,265.34


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1062000
  custom_metrics: {}
  date: 2021-10-25_05-10-34
  done: false
  episode_len_mean: 263.35
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.6334999999999877
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 3864
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07562906318954687
          cur_lr: 5.000000000000001e-05
          entropy: 1.1301857146951888
          entropy_coeff: 0.009999999999999998
          kl: 0.01648964801041575
          policy_loss: -0.0018850906855530209
          total_loss: 0.0025100096232361264
          vf_explained_var: 0.12533888220787048
          vf_loss: 0.014449863653216098
    num_agent_steps_sampled: 1062000
    num_agent_steps_trained: 1062000
    num_steps_sampled: 1062000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1062,45561.1,1062000,-2.6335,-2.13,-3.5,263.35


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1063000
  custom_metrics: {}
  date: 2021-10-25_05-11-17
  done: false
  episode_len_mean: 260.75
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.607499999999988
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 3868
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07562906318954687
          cur_lr: 5.000000000000001e-05
          entropy: 1.1019148912694718
          entropy_coeff: 0.009999999999999998
          kl: 0.008155581500071838
          policy_loss: 0.020151171584924063
          total_loss: 0.02310856738024288
          vf_explained_var: 0.14429035782814026
          vf_loss: 0.013359745498746634
    num_agent_steps_sampled: 1063000
    num_agent_steps_trained: 1063000
    num_steps_sampled: 1063000
    num_steps_trained: 106

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1063,45604.7,1063000,-2.6075,-2.13,-3.35,260.75




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1064000
  custom_metrics: {}
  date: 2021-10-25_05-12-13
  done: false
  episode_len_mean: 260.48
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.6047999999999876
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 3871
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07562906318954687
          cur_lr: 5.000000000000001e-05
          entropy: 1.5707080218527052
          entropy_coeff: 0.009999999999999998
          kl: 0.018824562404000142
          policy_loss: 0.03533276261554824
          total_loss: 0.03015461423330837
          vf_explained_var: 0.3893270790576935
          vf_loss: 0.00910524629547985
    num_agent_steps_sampled: 1064000
    num_agent_steps_trained: 1064000
    num_steps_sampled: 1064000
    num_steps_trained: 106400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1064,45659.9,1064000,-2.6048,-2.13,-3.33,260.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1065000
  custom_metrics: {}
  date: 2021-10-25_05-12-52
  done: false
  episode_len_mean: 261.01
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.610099999999988
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 3874
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07562906318954687
          cur_lr: 5.000000000000001e-05
          entropy: 1.249356034066942
          entropy_coeff: 0.009999999999999998
          kl: 0.014972850410122855
          policy_loss: -0.09322216221027904
          total_loss: -0.09182776800460285
          vf_explained_var: 0.42926695942878723
          vf_loss: 0.01275557057104177
    num_agent_steps_sampled: 1065000
    num_agent_steps_trained: 1065000
    num_steps_sampled: 1065000
    num_steps_trained: 10650

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1065,45699.3,1065000,-2.6101,-2.13,-3.33,261.01


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1066000
  custom_metrics: {}
  date: 2021-10-25_05-13-32
  done: false
  episode_len_mean: 262.7
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.626999999999988
  episode_reward_min: -3.569999999999968
  episodes_this_iter: 4
  episodes_total: 3878
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07562906318954687
          cur_lr: 5.000000000000001e-05
          entropy: 1.4294500841034783
          entropy_coeff: 0.009999999999999998
          kl: 0.017991128336972886
          policy_loss: 0.007124960008594725
          total_loss: 0.005180504669745763
          vf_explained_var: 0.5562326908111572
          vf_loss: 0.010989389247778389
    num_agent_steps_sampled: 1066000
    num_agent_steps_trained: 1066000
    num_steps_sampled: 1066000
    num_steps_trained: 10660

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1066,45738.7,1066000,-2.627,-2.13,-3.57,262.7


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1067000
  custom_metrics: {}
  date: 2021-10-25_05-14-10
  done: false
  episode_len_mean: 263.53
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.635299999999987
  episode_reward_min: -3.569999999999968
  episodes_this_iter: 3
  episodes_total: 3881
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07562906318954687
          cur_lr: 5.000000000000001e-05
          entropy: 1.5852423548698424
          entropy_coeff: 0.009999999999999998
          kl: 0.021788149067293715
          policy_loss: 0.016794914172755346
          total_loss: 0.009885068154997297
          vf_explained_var: 0.724649965763092
          vf_loss: 0.007294757815543562
    num_agent_steps_sampled: 1067000
    num_agent_steps_trained: 1067000
    num_steps_sampled: 1067000
    num_steps_trained: 10670

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1067,45776.7,1067000,-2.6353,-2.13,-3.57,263.53


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1068000
  custom_metrics: {}
  date: 2021-10-25_05-14-44
  done: false
  episode_len_mean: 265.75
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.657499999999987
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 3
  episodes_total: 3884
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11344359478432027
          cur_lr: 5.000000000000001e-05
          entropy: 1.8677875651253595
          entropy_coeff: 0.009999999999999998
          kl: 0.02151619431655536
          policy_loss: 0.05292017194959853
          total_loss: 0.046708210309346514
          vf_explained_var: 0.5492900609970093
          vf_loss: 0.010025038519072244
    num_agent_steps_sampled: 1068000
    num_agent_steps_trained: 1068000
    num_steps_sampled: 1068000
    num_steps_trained: 106800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1068,45811.5,1068000,-2.6575,-2.13,-4.09,265.75


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1069000
  custom_metrics: {}
  date: 2021-10-25_05-15-24
  done: false
  episode_len_mean: 267.02
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.670199999999986
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 3
  episodes_total: 3887
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1701653921764805
          cur_lr: 5.000000000000001e-05
          entropy: 1.4457510656780668
          entropy_coeff: 0.009999999999999998
          kl: 0.013367155740843767
          policy_loss: -0.06467660152249866
          total_loss: -0.066026221960783
          vf_explained_var: 0.6120002269744873
          vf_loss: 0.010833263655917512
    num_agent_steps_sampled: 1069000
    num_agent_steps_trained: 1069000
    num_steps_sampled: 1069000
    num_steps_trained: 1069000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1069,45850.8,1069000,-2.6702,-2.13,-4.09,267.02


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1070000
  custom_metrics: {}
  date: 2021-10-25_05-16-06
  done: false
  episode_len_mean: 267.66
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.676599999999986
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 3891
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1701653921764805
          cur_lr: 5.000000000000001e-05
          entropy: 1.2026496595806546
          entropy_coeff: 0.009999999999999998
          kl: 0.007958644205806248
          policy_loss: -0.00017909150984552171
          total_loss: -0.00014918293390009138
          vf_explained_var: 0.6355109214782715
          vf_loss: 0.01070212313077516
    num_agent_steps_sampled: 1070000
    num_agent_steps_trained: 1070000
    num_steps_sampled: 1070000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1070,45892.7,1070000,-2.6766,-2.13,-4.09,267.66


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1071000
  custom_metrics: {}
  date: 2021-10-25_05-16-46
  done: false
  episode_len_mean: 268.39
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.6838999999999866
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 3
  episodes_total: 3894
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1701653921764805
          cur_lr: 5.000000000000001e-05
          entropy: 1.2221282985475328
          entropy_coeff: 0.009999999999999998
          kl: 0.0077623194086758615
          policy_loss: -0.10083417503370179
          total_loss: -0.10101681161257955
          vf_explained_var: 0.616346538066864
          vf_loss: 0.010717769536293215
    num_agent_steps_sampled: 1071000
    num_agent_steps_trained: 1071000
    num_steps_sampled: 1071000
    num_steps_trained: 1071

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1071,45933.2,1071000,-2.6839,-2.13,-4.09,268.39


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1072000
  custom_metrics: {}
  date: 2021-10-25_05-17-31
  done: false
  episode_len_mean: 268.56
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.685599999999986
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 3898
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1701653921764805
          cur_lr: 5.000000000000001e-05
          entropy: 1.1430436703893874
          entropy_coeff: 0.009999999999999998
          kl: 0.008328340173564975
          policy_loss: -0.0036545644203821817
          total_loss: -0.002753567116128074
          vf_explained_var: 0.5047898292541504
          vf_loss: 0.010914237476471397
    num_agent_steps_sampled: 1072000
    num_agent_steps_trained: 1072000
    num_steps_sampled: 1072000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1072,45978.3,1072000,-2.6856,-2.13,-4.09,268.56




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1073000
  custom_metrics: {}
  date: 2021-10-25_05-18-37
  done: false
  episode_len_mean: 267.31
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.673099999999987
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 5
  episodes_total: 3903
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1701653921764805
          cur_lr: 5.000000000000001e-05
          entropy: 0.9566083020634122
          entropy_coeff: 0.009999999999999998
          kl: 0.007000395091588037
          policy_loss: -0.0380755126890209
          total_loss: -0.032300700578424664
          vf_explained_var: 0.3640766441822052
          vf_loss: 0.014149671575675407
    num_agent_steps_sampled: 1073000
    num_agent_steps_trained: 1073000
    num_steps_sampled: 1073000
    num_steps_trained: 107300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1073,46044.2,1073000,-2.6731,-2.1,-4.09,267.31


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1074000
  custom_metrics: {}
  date: 2021-10-25_05-19-22
  done: false
  episode_len_mean: 265.98
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6597999999999873
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 3907
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1701653921764805
          cur_lr: 5.000000000000001e-05
          entropy: 0.8842683255672454
          entropy_coeff: 0.009999999999999998
          kl: 0.007989418738194034
          policy_loss: 0.02232445925474167
          total_loss: 0.025945809359351795
          vf_explained_var: 0.26872748136520386
          vf_loss: 0.011104512680321931
    num_agent_steps_sampled: 1074000
    num_agent_steps_trained: 1074000
    num_steps_sampled: 1074000
    num_steps_trained: 10740

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1074,46088.8,1074000,-2.6598,-2.1,-4.09,265.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1075000
  custom_metrics: {}
  date: 2021-10-25_05-20-08
  done: false
  episode_len_mean: 265.3
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.652999999999987
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 3911
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1701653921764805
          cur_lr: 5.000000000000001e-05
          entropy: 0.7374880830446879
          entropy_coeff: 0.009999999999999998
          kl: 0.0042969158518681005
          policy_loss: 0.03003836472829183
          total_loss: 0.03614546764228079
          vf_explained_var: 0.15117400884628296
          vf_loss: 0.012750797304842207
    num_agent_steps_sampled: 1075000
    num_agent_steps_trained: 1075000
    num_steps_sampled: 1075000
    num_steps_trained: 1075000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1075,46135.4,1075000,-2.653,-2.1,-4.09,265.3


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1076000
  custom_metrics: {}
  date: 2021-10-25_05-20-58
  done: false
  episode_len_mean: 264.36
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.643599999999987
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 3915
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08508269608824025
          cur_lr: 5.000000000000001e-05
          entropy: 0.7347702980041504
          entropy_coeff: 0.009999999999999998
          kl: 0.004180314594688738
          policy_loss: 0.0261340727408727
          total_loss: 0.03172739040520456
          vf_explained_var: 0.118373341858387
          vf_loss: 0.012585347777025567
    num_agent_steps_sampled: 1076000
    num_agent_steps_trained: 1076000
    num_steps_sampled: 1076000
    num_steps_trained: 1076000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1076,46184.5,1076000,-2.6436,-2.1,-4.09,264.36


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1077000
  custom_metrics: {}
  date: 2021-10-25_05-21-45
  done: false
  episode_len_mean: 263.25
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6324999999999874
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 3919
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.042541348044120124
          cur_lr: 5.000000000000001e-05
          entropy: 0.7064759684933557
          entropy_coeff: 0.009999999999999998
          kl: 0.003935320408203009
          policy_loss: -0.02383338006006347
          total_loss: -0.01759594902396202
          vf_explained_var: 0.1138390526175499
          vf_loss: 0.013134779408574104
    num_agent_steps_sampled: 1077000
    num_agent_steps_trained: 1077000
    num_steps_sampled: 1077000
    num_steps_trained: 107

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1077,46232.1,1077000,-2.6325,-2.1,-4.09,263.25


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1078000
  custom_metrics: {}
  date: 2021-10-25_05-22-31
  done: false
  episode_len_mean: 262.07
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.620699999999988
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 5
  episodes_total: 3924
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021270674022060062
          cur_lr: 5.000000000000001e-05
          entropy: 0.8574092500739627
          entropy_coeff: 0.009999999999999998
          kl: 0.010925019307663635
          policy_loss: -0.02438917714688513
          total_loss: -0.0164668803413709
          vf_explained_var: 0.1891488879919052
          vf_loss: 0.016264004829443162
    num_agent_steps_sampled: 1078000
    num_agent_steps_trained: 1078000
    num_steps_sampled: 1078000
    num_steps_trained: 10780

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1078,46277.4,1078000,-2.6207,-2.1,-4.09,262.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1079000
  custom_metrics: {}
  date: 2021-10-25_05-23-18
  done: false
  episode_len_mean: 261.46
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6145999999999883
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 3928
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021270674022060062
          cur_lr: 5.000000000000001e-05
          entropy: 0.8399212870332929
          entropy_coeff: 0.009999999999999998
          kl: 0.008471089727981173
          policy_loss: 0.012287822489937147
          total_loss: 0.016705619709359274
          vf_explained_var: 0.22161532938480377
          vf_loss: 0.012636825297441747
    num_agent_steps_sampled: 1079000
    num_agent_steps_trained: 1079000
    num_steps_sampled: 1079000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1079,46324.8,1079000,-2.6146,-2.1,-4.09,261.46




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1080000
  custom_metrics: {}
  date: 2021-10-25_05-24-23
  done: false
  episode_len_mean: 259.49
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.594899999999989
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 3932
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021270674022060062
          cur_lr: 5.000000000000001e-05
          entropy: 0.8449520899189843
          entropy_coeff: 0.009999999999999998
          kl: 0.00770754309502636
          policy_loss: 0.026441166798273723
          total_loss: 0.030861695773071712
          vf_explained_var: 0.22940044105052948
          vf_loss: 0.012706103165530495
    num_agent_steps_sampled: 1080000
    num_agent_steps_trained: 1080000
    num_steps_sampled: 1080000
    num_steps_trained: 108

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1080,46390.1,1080000,-2.5949,-2.09,-4.09,259.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1081000
  custom_metrics: {}
  date: 2021-10-25_05-25-11
  done: false
  episode_len_mean: 259.04
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.5903999999999887
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 3936
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021270674022060062
          cur_lr: 5.000000000000001e-05
          entropy: 0.8011375566323599
          entropy_coeff: 0.009999999999999998
          kl: 0.005168354079092552
          policy_loss: -0.046074793487787244
          total_loss: -0.04099191547267967
          vf_explained_var: 0.1964576095342636
          vf_loss: 0.01298432287035717
    num_agent_steps_sampled: 1081000
    num_agent_steps_trained: 1081000
    num_steps_sampled: 1081000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1081,46437.4,1081000,-2.5904,-2.09,-4.09,259.04


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1082000
  custom_metrics: {}
  date: 2021-10-25_05-25-59
  done: false
  episode_len_mean: 258.79
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.5878999999999888
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 5
  episodes_total: 3941
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021270674022060062
          cur_lr: 5.000000000000001e-05
          entropy: 0.8363937159379323
          entropy_coeff: 0.009999999999999998
          kl: 0.00885782305348515
          policy_loss: -0.033261714668737515
          total_loss: -0.025198668820990457
          vf_explained_var: 0.2156822681427002
          vf_loss: 0.016238569385475583
    num_agent_steps_sampled: 1082000
    num_agent_steps_trained: 1082000
    num_steps_sampled: 1082000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1082,46485.5,1082000,-2.5879,-2.09,-4.09,258.79


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1083000
  custom_metrics: {}
  date: 2021-10-25_05-26-43
  done: false
  episode_len_mean: 258.45
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.5844999999999887
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 3945
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021270674022060062
          cur_lr: 5.000000000000001e-05
          entropy: 0.9681777126259274
          entropy_coeff: 0.009999999999999998
          kl: 0.006658992328060186
          policy_loss: -0.008607367426156998
          total_loss: -0.0056809980836179524
          vf_explained_var: 0.2789619565010071
          vf_loss: 0.012466507291214334
    num_agent_steps_sampled: 1083000
    num_agent_steps_trained: 1083000
    num_steps_sampled: 1083000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1083,46529.9,1083000,-2.5845,-2.09,-4.09,258.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1084000
  custom_metrics: {}
  date: 2021-10-25_05-27-29
  done: false
  episode_len_mean: 257.98
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.579799999999989
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 3949
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021270674022060062
          cur_lr: 5.000000000000001e-05
          entropy: 0.7714541143841214
          entropy_coeff: 0.009999999999999998
          kl: 0.008175975061990906
          policy_loss: 0.04200764613019096
          total_loss: 0.04620474477608998
          vf_explained_var: 0.15068043768405914
          vf_loss: 0.011737730436854893
    num_agent_steps_sampled: 1084000
    num_agent_steps_trained: 1084000
    num_steps_sampled: 1084000
    num_steps_trained: 1084

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1084,46575.8,1084000,-2.5798,-2.09,-4.09,257.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1085000
  custom_metrics: {}
  date: 2021-10-25_05-28-16
  done: false
  episode_len_mean: 257.73
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.5772999999999895
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 3953
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021270674022060062
          cur_lr: 5.000000000000001e-05
          entropy: 0.8761364923583137
          entropy_coeff: 0.009999999999999998
          kl: 0.00805929735201908
          policy_loss: 0.0212068024608824
          total_loss: 0.02582428248392211
          vf_explained_var: 0.16611932218074799
          vf_loss: 0.013207417229811351
    num_agent_steps_sampled: 1085000
    num_agent_steps_trained: 1085000
    num_steps_sampled: 1085000
    num_steps_trained: 10850

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1085,46622.5,1085000,-2.5773,-2.09,-4.09,257.73


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1086000
  custom_metrics: {}
  date: 2021-10-25_05-29-01
  done: false
  episode_len_mean: 257.51
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.5750999999999893
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 3957
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021270674022060062
          cur_lr: 5.000000000000001e-05
          entropy: 0.940468121237225
          entropy_coeff: 0.009999999999999998
          kl: 0.015387544559350552
          policy_loss: 0.030383923567003673
          total_loss: 0.03351110691825549
          vf_explained_var: 0.1865682750940323
          vf_loss: 0.0122045599648522
    num_agent_steps_sampled: 1086000
    num_agent_steps_trained: 1086000
    num_steps_sampled: 1086000
    num_steps_trained: 108600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1086,46667.5,1086000,-2.5751,-2.09,-4.09,257.51




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1087000
  custom_metrics: {}
  date: 2021-10-25_05-30-06
  done: false
  episode_len_mean: 257.07
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.5706999999999893
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 3961
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021270674022060062
          cur_lr: 5.000000000000001e-05
          entropy: 0.9919098907046848
          entropy_coeff: 0.009999999999999998
          kl: 0.008312117856366383
          policy_loss: 0.04315755930211809
          total_loss: 0.04463253277871344
          vf_explained_var: 0.22715482115745544
          vf_loss: 0.011217269922296207
    num_agent_steps_sampled: 1087000
    num_agent_steps_trained: 1087000
    num_steps_sampled: 1087000
    num_steps_trained: 108

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1087,46732.4,1087000,-2.5707,-2.09,-4.09,257.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1088000
  custom_metrics: {}
  date: 2021-10-25_05-30-51
  done: false
  episode_len_mean: 257.06
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.570599999999989
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 3965
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021270674022060062
          cur_lr: 5.000000000000001e-05
          entropy: 1.156579981909858
          entropy_coeff: 0.009999999999999998
          kl: 0.02548129955661393
          policy_loss: -0.0055262335472636755
          total_loss: -0.003193632471892569
          vf_explained_var: 0.23987528681755066
          vf_loss: 0.013356395903974772
    num_agent_steps_sampled: 1088000
    num_agent_steps_trained: 1088000
    num_steps_sampled: 1088000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1088,46777.7,1088000,-2.5706,-2.09,-4.09,257.06


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1089000
  custom_metrics: {}
  date: 2021-10-25_05-31-38
  done: false
  episode_len_mean: 256.19
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.561899999999989
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 3969
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0319060110330901
          cur_lr: 5.000000000000001e-05
          entropy: 1.0214479015933142
          entropy_coeff: 0.009999999999999998
          kl: 0.010801246015849407
          policy_loss: 0.011686097582181294
          total_loss: 0.014887840383582644
          vf_explained_var: 0.304284930229187
          vf_loss: 0.013071598888685306
    num_agent_steps_sampled: 1089000
    num_agent_steps_trained: 1089000
    num_steps_sampled: 1089000
    num_steps_trained: 108900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1089,46824.2,1089000,-2.5619,-2.09,-4.09,256.19


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1090000
  custom_metrics: {}
  date: 2021-10-25_05-32-22
  done: false
  episode_len_mean: 254.86
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.5485999999999893
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 3973
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0319060110330901
          cur_lr: 5.000000000000001e-05
          entropy: 1.1414251393742032
          entropy_coeff: 0.009999999999999998
          kl: 0.014682179154346326
          policy_loss: -0.002518245619204309
          total_loss: -0.001091238690747155
          vf_explained_var: 0.3935197591781616
          vf_loss: 0.01237280485737655
    num_agent_steps_sampled: 1090000
    num_agent_steps_trained: 1090000
    num_steps_sampled: 1090000
    num_steps_trained: 109

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1090,46868.8,1090000,-2.5486,-2.09,-4.09,254.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1091000
  custom_metrics: {}
  date: 2021-10-25_05-33-03
  done: false
  episode_len_mean: 254.22
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.5421999999999896
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 3
  episodes_total: 3976
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0319060110330901
          cur_lr: 5.000000000000001e-05
          entropy: 1.3125284075737
          entropy_coeff: 0.009999999999999998
          kl: 0.01836356804843907
          policy_loss: -0.07836842528647847
          total_loss: -0.07951473171512285
          vf_explained_var: 0.4886322021484375
          vf_loss: 0.011393071938720014
    num_agent_steps_sampled: 1091000
    num_agent_steps_trained: 1091000
    num_steps_sampled: 1091000
    num_steps_trained: 1091000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1091,46909.7,1091000,-2.5422,-2.09,-4.09,254.22


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1092000
  custom_metrics: {}
  date: 2021-10-25_05-33-50
  done: false
  episode_len_mean: 252.17
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.52169999999999
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 3980
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0319060110330901
          cur_lr: 5.000000000000001e-05
          entropy: 1.0231017543209924
          entropy_coeff: 0.009999999999999998
          kl: 0.007464208373477514
          policy_loss: -0.11818106265531646
          total_loss: -0.11386707896987597
          vf_explained_var: 0.42835649847984314
          vf_loss: 0.014306845246917672
    num_agent_steps_sampled: 1092000
    num_agent_steps_trained: 1092000
    num_steps_sampled: 1092000
    num_steps_trained: 10920

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1092,46956.6,1092000,-2.5217,-2.09,-4.09,252.17


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1093000
  custom_metrics: {}
  date: 2021-10-25_05-34-36
  done: false
  episode_len_mean: 248.96
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.489599999999991
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 4
  episodes_total: 3984
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0319060110330901
          cur_lr: 5.000000000000001e-05
          entropy: 1.0438584400547875
          entropy_coeff: 0.009999999999999998
          kl: 0.008801318770363449
          policy_loss: -0.11138695478439331
          total_loss: -0.10800507648123635
          vf_explained_var: 0.4702218770980835
          vf_loss: 0.013539648407863246
    num_agent_steps_sampled: 1093000
    num_agent_steps_trained: 1093000
    num_steps_sampled: 1093000
    num_steps_trained: 1093

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1093,47002,1093000,-2.4896,-2.09,-3.36,248.96


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1094000
  custom_metrics: {}
  date: 2021-10-25_05-35-23
  done: false
  episode_len_mean: 246.47
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.464699999999991
  episode_reward_min: -3.1999999999999758
  episodes_this_iter: 5
  episodes_total: 3989
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0319060110330901
          cur_lr: 5.000000000000001e-05
          entropy: 1.0906140274471707
          entropy_coeff: 0.009999999999999998
          kl: 0.032172284984614344
          policy_loss: -0.04412845787074831
          total_loss: -0.041451084696584276
          vf_explained_var: 0.5154848694801331
          vf_loss: 0.012557024250013961
    num_agent_steps_sampled: 1094000
    num_agent_steps_trained: 1094000
    num_steps_sampled: 1094000
    num_steps_trained: 109

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1094,47049.3,1094000,-2.4647,-2.09,-3.2,246.47




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1095000
  custom_metrics: {}
  date: 2021-10-25_05-36-27
  done: false
  episode_len_mean: 244.78
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.447799999999992
  episode_reward_min: -3.0799999999999783
  episodes_this_iter: 4
  episodes_total: 3993
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04785901654963513
          cur_lr: 5.000000000000001e-05
          entropy: 1.0799124876658122
          entropy_coeff: 0.009999999999999998
          kl: 0.013515989385788055
          policy_loss: 0.009710589713520474
          total_loss: 0.007955769532256657
          vf_explained_var: 0.5538610219955444
          vf_loss: 0.008397442107606266
    num_agent_steps_sampled: 1095000
    num_agent_steps_trained: 1095000
    num_steps_sampled: 1095000
    num_steps_trained: 109

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1095,47113.5,1095000,-2.4478,-2.09,-3.08,244.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1096000
  custom_metrics: {}
  date: 2021-10-25_05-37-14
  done: false
  episode_len_mean: 243.74
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.437399999999992
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 3997
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04785901654963513
          cur_lr: 5.000000000000001e-05
          entropy: 0.9855241828494602
          entropy_coeff: 0.009999999999999998
          kl: 0.018887499671800218
          policy_loss: 0.019364713546302585
          total_loss: 0.020183367282152177
          vf_explained_var: 0.3876754343509674
          vf_loss: 0.009769957874798112
    num_agent_steps_sampled: 1096000
    num_agent_steps_trained: 1096000
    num_steps_sampled: 1096000
    num_steps_trained: 109

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1096,47159.9,1096000,-2.4374,-2.09,-2.99,243.74


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1097000
  custom_metrics: {}
  date: 2021-10-25_05-37-59
  done: false
  episode_len_mean: 244.25
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.442499999999992
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 4001
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04785901654963513
          cur_lr: 5.000000000000001e-05
          entropy: 1.0654836283789741
          entropy_coeff: 0.009999999999999998
          kl: 0.015563743852369466
          policy_loss: 0.020365359634160994
          total_loss: 0.020368877136045033
          vf_explained_var: 0.20241519808769226
          vf_loss: 0.009913490433245898
    num_agent_steps_sampled: 1097000
    num_agent_steps_trained: 1097000
    num_steps_sampled: 1097000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1097,47204.8,1097000,-2.4425,-2.09,-2.99,244.25


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1098000
  custom_metrics: {}
  date: 2021-10-25_05-38-44
  done: false
  episode_len_mean: 244.65
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.446499999999992
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 4005
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04785901654963513
          cur_lr: 5.000000000000001e-05
          entropy: 1.0626482374138302
          entropy_coeff: 0.009999999999999998
          kl: 0.009039330751707078
          policy_loss: 0.016910757952266268
          total_loss: 0.017214109169112313
          vf_explained_var: 0.1500859260559082
          vf_loss: 0.010497219363848368
    num_agent_steps_sampled: 1098000
    num_agent_steps_trained: 1098000
    num_steps_sampled: 1098000
    num_steps_trained: 109

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1098,47250.6,1098000,-2.4465,-2.09,-2.99,244.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1099000
  custom_metrics: {}
  date: 2021-10-25_05-39-28
  done: false
  episode_len_mean: 244.78
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.4477999999999915
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 4009
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04785901654963513
          cur_lr: 5.000000000000001e-05
          entropy: 1.2362913264168633
          entropy_coeff: 0.009999999999999998
          kl: 0.07898287225622166
          policy_loss: 0.018590239849355487
          total_loss: 0.020348151524861652
          vf_explained_var: 0.14528657495975494
          vf_loss: 0.010340780475073391
    num_agent_steps_sampled: 1099000
    num_agent_steps_trained: 1099000
    num_steps_sampled: 1099000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1099,47294.2,1099000,-2.4478,-2.09,-2.99,244.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1100000
  custom_metrics: {}
  date: 2021-10-25_05-40-13
  done: false
  episode_len_mean: 245.14
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.451399999999991
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 4013
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07178852482445272
          cur_lr: 5.000000000000001e-05
          entropy: 0.8838081333372328
          entropy_coeff: 0.009999999999999998
          kl: 0.017495178234335797
          policy_loss: 0.01981248197456201
          total_loss: 0.024264815822243692
          vf_explained_var: 0.08477126806974411
          vf_loss: 0.01203446335469683
    num_agent_steps_sampled: 1100000
    num_agent_steps_trained: 1100000
    num_steps_sampled: 1100000
    num_steps_trained: 1100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1100,47339.4,1100000,-2.4514,-2.09,-2.99,245.14


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1101000
  custom_metrics: {}
  date: 2021-10-25_05-40-55
  done: false
  episode_len_mean: 246.22
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.4621999999999913
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 4017
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07178852482445272
          cur_lr: 5.000000000000001e-05
          entropy: 1.0512010269694858
          entropy_coeff: 0.009999999999999998
          kl: 0.01905007011474614
          policy_loss: 0.020770463347434997
          total_loss: 0.023264567140075895
          vf_explained_var: 0.26369529962539673
          vf_loss: 0.011638539139595297
    num_agent_steps_sampled: 1101000
    num_agent_steps_trained: 1101000
    num_steps_sampled: 1101000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1101,47380.8,1101000,-2.4622,-2.09,-2.99,246.22




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1102000
  custom_metrics: {}
  date: 2021-10-25_05-42-03
  done: false
  episode_len_mean: 245.68
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4567999999999914
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 4021
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07178852482445272
          cur_lr: 5.000000000000001e-05
          entropy: 0.6009450601206885
          entropy_coeff: 0.009999999999999998
          kl: 0.004655227815003116
          policy_loss: 0.035829747799370024
          total_loss: 0.039484577294852996
          vf_explained_var: 0.07320665568113327
          vf_loss: 0.009330088266223256
    num_agent_steps_sampled: 1102000
    num_agent_steps_trained: 1102000
    num_steps_sampled: 1102000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1102,47448.9,1102000,-2.4568,-2.01,-2.99,245.68


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1103000
  custom_metrics: {}
  date: 2021-10-25_05-42-51
  done: false
  episode_len_mean: 245.4
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4539999999999917
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 5
  episodes_total: 4026
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03589426241222636
          cur_lr: 5.000000000000001e-05
          entropy: 0.7124571535322402
          entropy_coeff: 0.009999999999999998
          kl: 0.015022646404716827
          policy_loss: -0.043988746073510915
          total_loss: -0.03689830054839452
          vf_explained_var: 0.22025205194950104
          vf_loss: 0.013675788161344826
    num_agent_steps_sampled: 1103000
    num_agent_steps_trained: 1103000
    num_steps_sampled: 1103000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1103,47497.2,1103000,-2.454,-2.01,-2.99,245.4


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1104000
  custom_metrics: {}
  date: 2021-10-25_05-43-38
  done: false
  episode_len_mean: 245.52
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4551999999999916
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 4030
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03589426241222636
          cur_lr: 5.000000000000001e-05
          entropy: 1.1544440812534755
          entropy_coeff: 0.009999999999999998
          kl: 0.02870954692525108
          policy_loss: 0.038120313982168835
          total_loss: 0.03912079988254441
          vf_explained_var: 0.3910854458808899
          vf_loss: 0.011514418727407853
    num_agent_steps_sampled: 1104000
    num_agent_steps_trained: 1104000
    num_steps_sampled: 1104000
    num_steps_trained: 11040

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1104,47544.5,1104000,-2.4552,-2.01,-2.99,245.52


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1105000
  custom_metrics: {}
  date: 2021-10-25_05-44-17
  done: false
  episode_len_mean: 247.25
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.472499999999991
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 3
  episodes_total: 4033
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05384139361833953
          cur_lr: 5.000000000000001e-05
          entropy: 1.2713192966249254
          entropy_coeff: 0.009999999999999998
          kl: 0.04305303179363097
          policy_loss: -0.036470538129409154
          total_loss: -0.03721381119555897
          vf_explained_var: 0.23297514021396637
          vf_loss: 0.00965188274729169
    num_agent_steps_sampled: 1105000
    num_agent_steps_trained: 1105000
    num_steps_sampled: 1105000
    num_steps_trained: 1105

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1105,47583.5,1105000,-2.4725,-2.01,-3.26,247.25


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1106000
  custom_metrics: {}
  date: 2021-10-25_05-44-51
  done: false
  episode_len_mean: 250.25
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.50249999999999
  episode_reward_min: -3.709999999999965
  episodes_this_iter: 3
  episodes_total: 4036
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08076209042750926
          cur_lr: 5.000000000000001e-05
          entropy: 1.4914012829462686
          entropy_coeff: 0.009999999999999998
          kl: 0.03405267991092447
          policy_loss: 0.008339206874370574
          total_loss: 0.0064156555467181735
          vf_explained_var: 0.16579024493694305
          vf_loss: 0.010240294299243639
    num_agent_steps_sampled: 1106000
    num_agent_steps_trained: 1106000
    num_steps_sampled: 1106000
    num_steps_trained: 11060

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1106,47617.4,1106000,-2.5025,-2.01,-3.71,250.25


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1107000
  custom_metrics: {}
  date: 2021-10-25_05-45-31
  done: false
  episode_len_mean: 252.68
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.52679999999999
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 4039
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12114313564126393
          cur_lr: 5.000000000000001e-05
          entropy: 1.5723388142055936
          entropy_coeff: 0.009999999999999998
          kl: 0.01805513206766235
          policy_loss: -0.0060065106385284
          total_loss: -0.01047361037797398
          vf_explained_var: 0.19831816852092743
          vf_loss: 0.009069034937096553
    num_agent_steps_sampled: 1107000
    num_agent_steps_trained: 1107000
    num_steps_sampled: 1107000
    num_steps_trained: 110700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1107,47656.7,1107000,-2.5268,-2.01,-3.78,252.68


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1108000
  custom_metrics: {}
  date: 2021-10-25_05-46-13
  done: false
  episode_len_mean: 253.4
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.5339999999999896
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4043
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12114313564126393
          cur_lr: 5.000000000000001e-05
          entropy: 1.0111754920747544
          entropy_coeff: 0.009999999999999998
          kl: 0.012494915920238015
          policy_loss: -0.008082932233810425
          total_loss: -0.007678362396028307
          vf_explained_var: 0.22446611523628235
          vf_loss: 0.009002648174969687
    num_agent_steps_sampled: 1108000
    num_agent_steps_trained: 1108000
    num_steps_sampled: 1108000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1108,47699.1,1108000,-2.534,-2.01,-3.78,253.4


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1109000
  custom_metrics: {}
  date: 2021-10-25_05-47-00
  done: false
  episode_len_mean: 253.07
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.5306999999999897
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4047
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12114313564126393
          cur_lr: 5.000000000000001e-05
          entropy: 0.5941387083795335
          entropy_coeff: 0.009999999999999998
          kl: 0.004918491542888464
          policy_loss: -0.11617029830813408
          total_loss: -0.10384976342320443
          vf_explained_var: 0.09863732755184174
          vf_loss: 0.017666081132160293
    num_agent_steps_sampled: 1109000
    num_agent_steps_trained: 1109000
    num_steps_sampled: 1109000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1109,47745.4,1109000,-2.5307,-2.01,-3.78,253.07




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1110000
  custom_metrics: {}
  date: 2021-10-25_05-48-04
  done: false
  episode_len_mean: 252.63
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.52629999999999
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 5
  episodes_total: 4052
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.060571567820631965
          cur_lr: 5.000000000000001e-05
          entropy: 0.7226180712381999
          entropy_coeff: 0.009999999999999998
          kl: 0.007027708519447155
          policy_loss: -0.007948428889115651
          total_loss: 0.001719490107562807
          vf_explained_var: 0.15607741475105286
          vf_loss: 0.016468421638839773
    num_agent_steps_sampled: 1110000
    num_agent_steps_trained: 1110000
    num_steps_sampled: 1110000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1110,47810.2,1110000,-2.5263,-2.01,-3.78,252.63


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1111000
  custom_metrics: {}
  date: 2021-10-25_05-48-53
  done: false
  episode_len_mean: 251.83
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.5182999999999898
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4056
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.060571567820631965
          cur_lr: 5.000000000000001e-05
          entropy: 0.6767628464433882
          entropy_coeff: 0.009999999999999998
          kl: 0.008191310705861099
          policy_loss: 0.011637369791666666
          total_loss: 0.018107915504111186
          vf_explained_var: 0.14274722337722778
          vf_loss: 0.012742013360063234
    num_agent_steps_sampled: 1111000
    num_agent_steps_trained: 1111000
    num_steps_sampled: 1111000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1111,47858.7,1111000,-2.5183,-2.01,-3.78,251.83


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1112000
  custom_metrics: {}
  date: 2021-10-25_05-49-40
  done: false
  episode_len_mean: 251.74
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.5173999999999896
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4060
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.060571567820631965
          cur_lr: 5.000000000000001e-05
          entropy: 0.7974992235501607
          entropy_coeff: 0.009999999999999998
          kl: 0.011195143193496746
          policy_loss: -0.07558599627680249
          total_loss: -0.06973662235670619
          vf_explained_var: 0.15877805650234222
          vf_loss: 0.013146257576429182
    num_agent_steps_sampled: 1112000
    num_agent_steps_trained: 1112000
    num_steps_sampled: 1112000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1112,47905.9,1112000,-2.5174,-2.01,-3.78,251.74


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1113000
  custom_metrics: {}
  date: 2021-10-25_05-50-29
  done: false
  episode_len_mean: 250.56
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.5055999999999905
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 5
  episodes_total: 4065
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.060571567820631965
          cur_lr: 5.000000000000001e-05
          entropy: 0.7374741911888123
          entropy_coeff: 0.009999999999999998
          kl: 0.006656969397243747
          policy_loss: -0.025414302282863194
          total_loss: -0.015819003681341808
          vf_explained_var: 0.17201003432273865
          vf_loss: 0.016566816065460444
    num_agent_steps_sampled: 1113000
    num_agent_steps_trained: 1113000
    num_steps_sampled: 1113000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1113,47954.3,1113000,-2.5056,-2.01,-3.78,250.56


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1114000
  custom_metrics: {}
  date: 2021-10-25_05-51-17
  done: false
  episode_len_mean: 250.2
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.5019999999999905
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4069
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.060571567820631965
          cur_lr: 5.000000000000001e-05
          entropy: 0.7352301081021627
          entropy_coeff: 0.009999999999999998
          kl: 0.006190803958540493
          policy_loss: 0.04926679266823663
          total_loss: 0.05379181206226349
          vf_explained_var: 0.167555034160614
          vf_loss: 0.01150233242660761
    num_agent_steps_sampled: 1114000
    num_agent_steps_trained: 1114000
    num_steps_sampled: 1114000
    num_steps_trained: 1114000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1114,48002.4,1114000,-2.502,-2.01,-3.78,250.2


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1115000
  custom_metrics: {}
  date: 2021-10-25_05-52-05
  done: false
  episode_len_mean: 249.16
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4915999999999907
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4073
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.060571567820631965
          cur_lr: 5.000000000000001e-05
          entropy: 0.6378703971703847
          entropy_coeff: 0.009999999999999998
          kl: 0.003224538326091617
          policy_loss: 0.023763447834385765
          total_loss: 0.03036144168840514
          vf_explained_var: 0.10086598992347717
          vf_loss: 0.01278138142079115
    num_agent_steps_sampled: 1115000
    num_agent_steps_trained: 1115000
    num_steps_sampled: 1115000
    num_steps_trained: 111

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1115,48050.7,1115000,-2.4916,-2.01,-3.78,249.16


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1116000
  custom_metrics: {}
  date: 2021-10-25_05-52-53
  done: false
  episode_len_mean: 247.24
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4723999999999906
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 5
  episodes_total: 4078
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.030285783910315982
          cur_lr: 5.000000000000001e-05
          entropy: 0.6742380208439297
          entropy_coeff: 0.009999999999999998
          kl: 0.009938596485590444
          policy_loss: -0.025588534937964546
          total_loss: -0.014620298229985767
          vf_explained_var: 0.10579211264848709
          vf_loss: 0.017409621023883423
    num_agent_steps_sampled: 1116000
    num_agent_steps_trained: 1116000
    num_steps_sampled: 1116000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1116,48099.2,1116000,-2.4724,-2.01,-3.78,247.24




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1117000
  custom_metrics: {}
  date: 2021-10-25_05-54-00
  done: false
  episode_len_mean: 246.27
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4626999999999915
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4082
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.030285783910315982
          cur_lr: 5.000000000000001e-05
          entropy: 0.7340957085291545
          entropy_coeff: 0.009999999999999998
          kl: 0.004369693903546571
          policy_loss: 0.025415993399090238
          total_loss: 0.031502876016828746
          vf_explained_var: 0.13216105103492737
          vf_loss: 0.013295498955994845
    num_agent_steps_sampled: 1117000
    num_agent_steps_trained: 1117000
    num_steps_sampled: 1117000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1117,48165.3,1117000,-2.4627,-2.01,-3.78,246.27


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1118000
  custom_metrics: {}
  date: 2021-10-25_05-54-49
  done: false
  episode_len_mean: 245.35
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4534999999999916
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4086
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.015142891955157991
          cur_lr: 5.000000000000001e-05
          entropy: 0.7026518483956655
          entropy_coeff: 0.009999999999999998
          kl: 0.005483048854123101
          policy_loss: -0.0783739249739382
          total_loss: -0.07172043017215199
          vf_explained_var: 0.12922252714633942
          vf_loss: 0.013596985375301705
    num_agent_steps_sampled: 1118000
    num_agent_steps_trained: 1118000
    num_steps_sampled: 1118000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1118,48214.4,1118000,-2.4535,-2.01,-3.78,245.35


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1119000
  custom_metrics: {}
  date: 2021-10-25_05-55-38
  done: false
  episode_len_mean: 245.17
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4516999999999918
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 5
  episodes_total: 4091
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.015142891955157991
          cur_lr: 5.000000000000001e-05
          entropy: 0.7682644049326579
          entropy_coeff: 0.009999999999999998
          kl: 0.009692280841494177
          policy_loss: -0.017713969863123364
          total_loss: -0.00801006125079261
          vf_explained_var: 0.15347683429718018
          vf_loss: 0.017239784645951457
    num_agent_steps_sampled: 1119000
    num_agent_steps_trained: 1119000
    num_steps_sampled: 1119000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1119,48263.5,1119000,-2.4517,-2.01,-3.78,245.17


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1120000
  custom_metrics: {}
  date: 2021-10-25_05-56-27
  done: false
  episode_len_mean: 244.62
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4461999999999917
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4095
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.015142891955157991
          cur_lr: 5.000000000000001e-05
          entropy: 0.8430749641524421
          entropy_coeff: 0.009999999999999998
          kl: 0.006972342996937818
          policy_loss: 0.019204075717263753
          total_loss: 0.02407864775094721
          vf_explained_var: 0.17713773250579834
          vf_loss: 0.013199737688733473
    num_agent_steps_sampled: 1120000
    num_agent_steps_trained: 1120000
    num_steps_sampled: 1120000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1120,48312.9,1120000,-2.4462,-2.01,-3.78,244.62


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1121000
  custom_metrics: {}
  date: 2021-10-25_05-57-17
  done: false
  episode_len_mean: 244.0
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4399999999999915
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4099
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.015142891955157991
          cur_lr: 5.000000000000001e-05
          entropy: 0.8227487153477139
          entropy_coeff: 0.009999999999999998
          kl: 0.0071265196298927695
          policy_loss: -0.007959141499466367
          total_loss: -0.0027450654241773817
          vf_explained_var: 0.14342324435710907
          vf_loss: 0.013333646073523496
    num_agent_steps_sampled: 1121000
    num_agent_steps_trained: 1121000
    num_steps_sampled: 1121000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1121,48362.1,1121000,-2.44,-2.01,-3.78,244


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1122000
  custom_metrics: {}
  date: 2021-10-25_05-58-05
  done: false
  episode_len_mean: 243.01
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4300999999999915
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 5
  episodes_total: 4104
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.015142891955157991
          cur_lr: 5.000000000000001e-05
          entropy: 0.8086566719743941
          entropy_coeff: 0.009999999999999998
          kl: 0.014287070373451468
          policy_loss: -0.02588444443212615
          total_loss: -0.017410420212480758
          vf_explained_var: 0.2077171802520752
          vf_loss: 0.01634423935578929
    num_agent_steps_sampled: 1122000
    num_agent_steps_trained: 1122000
    num_steps_sampled: 1122000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1122,48410.1,1122000,-2.4301,-2.01,-3.78,243.01


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1123000
  custom_metrics: {}
  date: 2021-10-25_05-58-52
  done: false
  episode_len_mean: 242.43
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4242999999999917
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4108
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.015142891955157991
          cur_lr: 5.000000000000001e-05
          entropy: 0.7843461712201436
          entropy_coeff: 0.009999999999999998
          kl: 0.006477544628220539
          policy_loss: 0.026989403035905628
          total_loss: 0.03083505067560408
          vf_explained_var: 0.22111795842647552
          vf_loss: 0.01159101975046926
    num_agent_steps_sampled: 1123000
    num_agent_steps_trained: 1123000
    num_steps_sampled: 1123000
    num_steps_trained: 112

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1123,48457.4,1123000,-2.4243,-2.01,-3.78,242.43




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1124000
  custom_metrics: {}
  date: 2021-10-25_05-59-59
  done: false
  episode_len_mean: 241.93
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4192999999999922
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4112
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.015142891955157991
          cur_lr: 5.000000000000001e-05
          entropy: 0.9375204000208113
          entropy_coeff: 0.009999999999999998
          kl: 0.007866993029165315
          policy_loss: 0.003591599025660091
          total_loss: 0.007215615941418542
          vf_explained_var: 0.3006683886051178
          vf_loss: 0.01288009098627501
    num_agent_steps_sampled: 1124000
    num_agent_steps_trained: 1124000
    num_steps_sampled: 1124000
    num_steps_trained: 112

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1124,48524.2,1124000,-2.4193,-2.01,-3.78,241.93


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1125000
  custom_metrics: {}
  date: 2021-10-25_06-00-44
  done: false
  episode_len_mean: 241.5
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.414999999999992
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4116
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.015142891955157991
          cur_lr: 5.000000000000001e-05
          entropy: 1.1916280342472925
          entropy_coeff: 0.009999999999999998
          kl: 0.023883708825280316
          policy_loss: -0.005511912330985069
          total_loss: -0.004809328913688659
          vf_explained_var: 0.33441129326820374
          vf_loss: 0.012257198378857638
    num_agent_steps_sampled: 1125000
    num_agent_steps_trained: 1125000
    num_steps_sampled: 1125000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1125,48569.3,1125000,-2.415,-2.01,-3.78,241.5


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1126000
  custom_metrics: {}
  date: 2021-10-25_06-01-31
  done: false
  episode_len_mean: 241.96
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.419599999999992
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4120
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022714337932736983
          cur_lr: 5.000000000000001e-05
          entropy: 0.7803697678777907
          entropy_coeff: 0.009999999999999998
          kl: 0.00561747369980682
          policy_loss: -0.0017303319440947638
          total_loss: 0.002920280024409294
          vf_explained_var: 0.27058443427085876
          vf_loss: 0.012326710029608674
    num_agent_steps_sampled: 1126000
    num_agent_steps_trained: 1126000
    num_steps_sampled: 1126000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1126,48616.2,1126000,-2.4196,-2.02,-3.78,241.96


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1127000
  custom_metrics: {}
  date: 2021-10-25_06-02-19
  done: false
  episode_len_mean: 242.32
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.423199999999992
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 5
  episodes_total: 4125
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022714337932736983
          cur_lr: 5.000000000000001e-05
          entropy: 0.8620388944943745
          entropy_coeff: 0.009999999999999998
          kl: 0.007109191965754588
          policy_loss: -0.027828399216135342
          total_loss: -0.02083713254994816
          vf_explained_var: 0.31047335267066956
          vf_loss: 0.01545017497200105
    num_agent_steps_sampled: 1127000
    num_agent_steps_trained: 1127000
    num_steps_sampled: 1127000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1127,48664.8,1127000,-2.4232,-2.02,-3.78,242.32


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1128000
  custom_metrics: {}
  date: 2021-10-25_06-03-05
  done: false
  episode_len_mean: 242.8
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.4279999999999924
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4129
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022714337932736983
          cur_lr: 5.000000000000001e-05
          entropy: 1.061268186569214
          entropy_coeff: 0.009999999999999998
          kl: 0.017435607572117026
          policy_loss: -0.0193641624516911
          total_loss: -0.018546867784526612
          vf_explained_var: 0.45911768078804016
          vf_loss: 0.011033935226603515
    num_agent_steps_sampled: 1128000
    num_agent_steps_trained: 1128000
    num_steps_sampled: 1128000
    num_steps_trained: 112

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1128,48710.8,1128000,-2.428,-2.02,-3.78,242.8


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1129000
  custom_metrics: {}
  date: 2021-10-25_06-03-53
  done: false
  episode_len_mean: 241.48
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.414799999999993
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4133
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022714337932736983
          cur_lr: 5.000000000000001e-05
          entropy: 1.0747370938460032
          entropy_coeff: 0.009999999999999998
          kl: 0.01259792297267936
          policy_loss: -0.019008460144201916
          total_loss: -0.018546994609965218
          vf_explained_var: 0.44034215807914734
          vf_loss: 0.010922682647489839
    num_agent_steps_sampled: 1129000
    num_agent_steps_trained: 1129000
    num_steps_sampled: 1129000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1129,48757.9,1129000,-2.4148,-2.02,-3.78,241.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1130000
  custom_metrics: {}
  date: 2021-10-25_06-04-40
  done: false
  episode_len_mean: 236.91
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.3690999999999938
  episode_reward_min: -3.179999999999976
  episodes_this_iter: 4
  episodes_total: 4137
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022714337932736983
          cur_lr: 5.000000000000001e-05
          entropy: 0.7149281740188599
          entropy_coeff: 0.009999999999999998
          kl: 0.008438334261064757
          policy_loss: 0.023177444438139596
          total_loss: 0.026819287654426364
          vf_explained_var: 0.256857693195343
          vf_loss: 0.010599455589221584
    num_agent_steps_sampled: 1130000
    num_agent_steps_trained: 1130000
    num_steps_sampled: 1130000
    num_steps_trained: 1130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1130,48805,1130000,-2.3691,-2.02,-3.18,236.91




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1131000
  custom_metrics: {}
  date: 2021-10-25_06-05-46
  done: false
  episode_len_mean: 235.04
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.350399999999994
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 4
  episodes_total: 4141
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022714337932736983
          cur_lr: 5.000000000000001e-05
          entropy: 0.6176607231299083
          entropy_coeff: 0.009999999999999998
          kl: 0.005101234367353413
          policy_loss: -0.016211564590533575
          total_loss: -0.010014018540581067
          vf_explained_var: 0.20197491347789764
          vf_loss: 0.012258283918102582
    num_agent_steps_sampled: 1131000
    num_agent_steps_trained: 1131000
    num_steps_sampled: 1131000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1131,48871.1,1131000,-2.3504,-2.02,-3.14,235.04


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1132000
  custom_metrics: {}
  date: 2021-10-25_06-06-34
  done: false
  episode_len_mean: 234.46
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.344599999999994
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 5
  episodes_total: 4146
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022714337932736983
          cur_lr: 5.000000000000001e-05
          entropy: 0.7949435101615058
          entropy_coeff: 0.009999999999999998
          kl: 0.009811478852635799
          policy_loss: -0.02306173774931166
          total_loss: -0.01593707021739748
          vf_explained_var: 0.2586574852466583
          vf_loss: 0.014851240472247203
    num_agent_steps_sampled: 1132000
    num_agent_steps_trained: 1132000
    num_steps_sampled: 1132000
    num_steps_trained: 1132

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1132,48918.8,1132000,-2.3446,-2.02,-2.95,234.46


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1133000
  custom_metrics: {}
  date: 2021-10-25_06-07-21
  done: false
  episode_len_mean: 234.84
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.3483999999999936
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 4150
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022714337932736983
          cur_lr: 5.000000000000001e-05
          entropy: 0.9707430554760828
          entropy_coeff: 0.009999999999999998
          kl: 0.008722989521335479
          policy_loss: 0.00525674136976401
          total_loss: 0.007178386176625888
          vf_explained_var: 0.3399792015552521
          vf_loss: 0.011430938883374134
    num_agent_steps_sampled: 1133000
    num_agent_steps_trained: 1133000
    num_steps_sampled: 1133000
    num_steps_trained: 1133

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1133,48966.3,1133000,-2.3484,-2.02,-2.95,234.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1134000
  custom_metrics: {}
  date: 2021-10-25_06-08-07
  done: false
  episode_len_mean: 235.3
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.352999999999994
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 4154
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022714337932736983
          cur_lr: 5.000000000000001e-05
          entropy: 1.0286848300033145
          entropy_coeff: 0.009999999999999998
          kl: 0.009079081624198201
          policy_loss: 0.03461129135555691
          total_loss: 0.035051323639021977
          vf_explained_var: 0.33902889490127563
          vf_loss: 0.010520657653816871
    num_agent_steps_sampled: 1134000
    num_agent_steps_trained: 1134000
    num_steps_sampled: 1134000
    num_steps_trained: 11340

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1134,49011.7,1134000,-2.353,-2.02,-2.95,235.3


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1135000
  custom_metrics: {}
  date: 2021-10-25_06-08-55
  done: false
  episode_len_mean: 235.31
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.353099999999994
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 4158
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022714337932736983
          cur_lr: 5.000000000000001e-05
          entropy: 0.8652426189846463
          entropy_coeff: 0.009999999999999998
          kl: 0.009971163169968298
          policy_loss: 0.04922089825073878
          total_loss: 0.0502669483423233
          vf_explained_var: 0.3336583077907562
          vf_loss: 0.009471988222665257
    num_agent_steps_sampled: 1135000
    num_agent_steps_trained: 1135000
    num_steps_sampled: 1135000
    num_steps_trained: 1135000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1135,49059.9,1135000,-2.3531,-2.02,-2.95,235.31


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1136000
  custom_metrics: {}
  date: 2021-10-25_06-09-43
  done: false
  episode_len_mean: 235.42
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.354199999999994
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 4162
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022714337932736983
          cur_lr: 5.000000000000001e-05
          entropy: 0.775045856502321
          entropy_coeff: 0.009999999999999998
          kl: 0.006761913207198589
          policy_loss: -0.011030744678444332
          total_loss: -0.005646541217962901
          vf_explained_var: 0.20626653730869293
          vf_loss: 0.012981069191462464
    num_agent_steps_sampled: 1136000
    num_agent_steps_trained: 1136000
    num_steps_sampled: 1136000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1136,49108.1,1136000,-2.3542,-2.02,-2.95,235.42


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1137000
  custom_metrics: {}
  date: 2021-10-25_06-10-31
  done: false
  episode_len_mean: 235.22
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.3521999999999936
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 5
  episodes_total: 4167
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022714337932736983
          cur_lr: 5.000000000000001e-05
          entropy: 0.8393574800756243
          entropy_coeff: 0.009999999999999998
          kl: 0.008412751934771891
          policy_loss: -0.026645648976167044
          total_loss: -0.01866717686255773
          vf_explained_var: 0.1723737269639969
          vf_loss: 0.016180959581914876
    num_agent_steps_sampled: 1137000
    num_agent_steps_trained: 1137000
    num_steps_sampled: 1137000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1137,49156.1,1137000,-2.3522,-2.02,-2.95,235.22




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1138000
  custom_metrics: {}
  date: 2021-10-25_06-11-36
  done: false
  episode_len_mean: 235.01
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.350099999999994
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 4171
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022714337932736983
          cur_lr: 5.000000000000001e-05
          entropy: 0.8386262151930067
          entropy_coeff: 0.009999999999999998
          kl: 0.008574582107120124
          policy_loss: 0.03404262558453613
          total_loss: 0.03767820960945553
          vf_explained_var: 0.18101045489311218
          vf_loss: 0.01182707789250546
    num_agent_steps_sampled: 1138000
    num_agent_steps_trained: 1138000
    num_steps_sampled: 1138000
    num_steps_trained: 113800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1138,49221,1138000,-2.3501,-2.02,-2.95,235.01


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1139000
  custom_metrics: {}
  date: 2021-10-25_06-12-25
  done: false
  episode_len_mean: 235.21
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.3520999999999934
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 4175
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022714337932736983
          cur_lr: 5.000000000000001e-05
          entropy: 0.8113932907581329
          entropy_coeff: 0.009999999999999998
          kl: 0.004903447904591
          policy_loss: -0.012032898101541732
          total_loss: -0.007231237573756112
          vf_explained_var: 0.17602084577083588
          vf_loss: 0.012804214418348338
    num_agent_steps_sampled: 1139000
    num_agent_steps_trained: 1139000
    num_steps_sampled: 1139000
    num_steps_trained: 113

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1139,49270.1,1139000,-2.3521,-2.02,-2.95,235.21


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1140000
  custom_metrics: {}
  date: 2021-10-25_06-13-14
  done: false
  episode_len_mean: 235.48
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.354799999999994
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 5
  episodes_total: 4180
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011357168966368491
          cur_lr: 5.000000000000001e-05
          entropy: 0.9359502110216352
          entropy_coeff: 0.009999999999999998
          kl: 0.013232606606627999
          policy_loss: -0.026819209671682782
          total_loss: -0.020110106468200682
          vf_explained_var: 0.1916176676750183
          vf_loss: 0.01591831940329737
    num_agent_steps_sampled: 1140000
    num_agent_steps_trained: 1140000
    num_steps_sampled: 1140000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1140,49318.9,1140000,-2.3548,-2.03,-2.95,235.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1141000
  custom_metrics: {}
  date: 2021-10-25_06-14-01
  done: false
  episode_len_mean: 235.91
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3590999999999935
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 4184
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011357168966368491
          cur_lr: 5.000000000000001e-05
          entropy: 1.0083800666862064
          entropy_coeff: 0.009999999999999998
          kl: 0.0063401214442905815
          policy_loss: 0.022949798115425638
          total_loss: 0.025207626654042137
          vf_explained_var: 0.21245399117469788
          vf_loss: 0.012269620949195491
    num_agent_steps_sampled: 1141000
    num_agent_steps_trained: 1141000
    num_steps_sampled: 1141000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1141,49366,1141000,-2.3591,-2.03,-2.95,235.91


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1142000
  custom_metrics: {}
  date: 2021-10-25_06-14-48
  done: false
  episode_len_mean: 236.28
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.362799999999994
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 4188
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011357168966368491
          cur_lr: 5.000000000000001e-05
          entropy: 1.0306947078969744
          entropy_coeff: 0.009999999999999998
          kl: 0.010601397025054998
          policy_loss: 0.020580288850598864
          total_loss: 0.022843707766797807
          vf_explained_var: 0.2665292024612427
          vf_loss: 0.012449964001360867
    num_agent_steps_sampled: 1142000
    num_agent_steps_trained: 1142000
    num_steps_sampled: 1142000
    num_steps_trained: 114

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1142,49413.1,1142000,-2.3628,-2.03,-2.95,236.28


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1143000
  custom_metrics: {}
  date: 2021-10-25_06-15-35
  done: false
  episode_len_mean: 236.88
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.368799999999993
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 4192
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011357168966368491
          cur_lr: 5.000000000000001e-05
          entropy: 0.9413394398159451
          entropy_coeff: 0.009999999999999998
          kl: 0.008638070025573876
          policy_loss: 0.02322403337392542
          total_loss: 0.026528236083686353
          vf_explained_var: 0.25735506415367126
          vf_loss: 0.012619490176439286
    num_agent_steps_sampled: 1143000
    num_agent_steps_trained: 1143000
    num_steps_sampled: 1143000
    num_steps_trained: 114

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1143,49460.3,1143000,-2.3688,-2.03,-2.95,236.88


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1144000
  custom_metrics: {}
  date: 2021-10-25_06-16-23
  done: false
  episode_len_mean: 237.04
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3703999999999934
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 4196
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011357168966368491
          cur_lr: 5.000000000000001e-05
          entropy: 0.9084414945708381
          entropy_coeff: 0.009999999999999998
          kl: 0.007977494975281295
          policy_loss: -0.003772105814682113
          total_loss: 5.7973215977350874e-05
          vf_explained_var: 0.23793615400791168
          vf_loss: 0.012823890149593353
    num_agent_steps_sampled: 1144000
    num_agent_steps_trained: 1144000
    num_steps_sampled: 1144000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1144,49507.8,1144000,-2.3704,-2.03,-2.95,237.04




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1145000
  custom_metrics: {}
  date: 2021-10-25_06-17-29
  done: false
  episode_len_mean: 236.93
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3692999999999937
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 5
  episodes_total: 4201
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011357168966368491
          cur_lr: 5.000000000000001e-05
          entropy: 0.963139334652159
          entropy_coeff: 0.009999999999999998
          kl: 0.00963987355090177
          policy_loss: -0.04155074101355341
          total_loss: -0.03523793849680159
          vf_explained_var: 0.27900221943855286
          vf_loss: 0.015834715600228973
    num_agent_steps_sampled: 1145000
    num_agent_steps_trained: 1145000
    num_steps_sampled: 1145000
    num_steps_trained: 114

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1145,49573.5,1145000,-2.3693,-2.03,-2.95,236.93


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1146000
  custom_metrics: {}
  date: 2021-10-25_06-18-14
  done: false
  episode_len_mean: 237.48
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.374799999999993
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 4205
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011357168966368491
          cur_lr: 5.000000000000001e-05
          entropy: 0.9931738740868039
          entropy_coeff: 0.009999999999999998
          kl: 0.008951317142200526
          policy_loss: -0.0013255628446737925
          total_loss: 0.0003602073424392276
          vf_explained_var: 0.36099034547805786
          vf_loss: 0.011515848100599316
    num_agent_steps_sampled: 1146000
    num_agent_steps_trained: 1146000
    num_steps_sampled: 1146000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1146,49618.8,1146000,-2.3748,-2.03,-2.95,237.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1147000
  custom_metrics: {}
  date: 2021-10-25_06-19-01
  done: false
  episode_len_mean: 237.95
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3794999999999935
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 4209
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011357168966368491
          cur_lr: 5.000000000000001e-05
          entropy: 0.9143741250038147
          entropy_coeff: 0.009999999999999998
          kl: 0.008704229901576434
          policy_loss: 0.03917532795005375
          total_loss: 0.039721374793185125
          vf_explained_var: 0.3613041043281555
          vf_loss: 0.009590933145955206
    num_agent_steps_sampled: 1147000
    num_agent_steps_trained: 1147000
    num_steps_sampled: 1147000
    num_steps_trained: 114

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1147,49666.2,1147000,-2.3795,-2.03,-2.95,237.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1148000
  custom_metrics: {}
  date: 2021-10-25_06-19-49
  done: false
  episode_len_mean: 238.18
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.381799999999993
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 4213
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011357168966368491
          cur_lr: 5.000000000000001e-05
          entropy: 0.8358497573269739
          entropy_coeff: 0.009999999999999998
          kl: 0.0069359447326788155
          policy_loss: 0.03337633493873808
          total_loss: 0.038096428331401616
          vf_explained_var: 0.26573261618614197
          vf_loss: 0.012999820481571887
    num_agent_steps_sampled: 1148000
    num_agent_steps_trained: 1148000
    num_steps_sampled: 1148000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1148,49713.3,1148000,-2.3818,-2.03,-2.95,238.18


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1149000
  custom_metrics: {}
  date: 2021-10-25_06-20-36
  done: false
  episode_len_mean: 237.86
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.378599999999993
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 4217
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011357168966368491
          cur_lr: 5.000000000000001e-05
          entropy: 0.7203302337063684
          entropy_coeff: 0.009999999999999998
          kl: 0.00696635515039969
          policy_loss: 0.02409987019168006
          total_loss: 0.030118725614415275
          vf_explained_var: 0.19468849897384644
          vf_loss: 0.01314303855308228
    num_agent_steps_sampled: 1149000
    num_agent_steps_trained: 1149000
    num_steps_sampled: 1149000
    num_steps_trained: 11490

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1149,49760.9,1149000,-2.3786,-2.03,-2.95,237.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1150000
  custom_metrics: {}
  date: 2021-10-25_06-21-24
  done: false
  episode_len_mean: 237.65
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3764999999999934
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 4221
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011357168966368491
          cur_lr: 5.000000000000001e-05
          entropy: 0.6186984241008758
          entropy_coeff: 0.009999999999999998
          kl: 0.006822638420814068
          policy_loss: -0.05542285318175952
          total_loss: -0.04829150935014089
          vf_explained_var: 0.13794152438640594
          vf_loss: 0.013240843173116445
    num_agent_steps_sampled: 1150000
    num_agent_steps_trained: 1150000
    num_steps_sampled: 1150000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1150,49808.9,1150000,-2.3765,-2.03,-2.95,237.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1151000
  custom_metrics: {}
  date: 2021-10-25_06-22-12
  done: false
  episode_len_mean: 237.11
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.371099999999993
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 5
  episodes_total: 4226
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011357168966368491
          cur_lr: 5.000000000000001e-05
          entropy: 0.711753746536043
          entropy_coeff: 0.009999999999999998
          kl: 0.013747168518023973
          policy_loss: -0.02036349798242251
          total_loss: -0.01074652878774537
          vf_explained_var: 0.16682083904743195
          vf_loss: 0.016578379107846153
    num_agent_steps_sampled: 1151000
    num_agent_steps_trained: 1151000
    num_steps_sampled: 1151000
    num_steps_trained: 115

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1151,49856.3,1151000,-2.3711,-2.03,-2.85,237.11




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1152000
  custom_metrics: {}
  date: 2021-10-25_06-23-16
  done: false
  episode_len_mean: 236.61
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3660999999999937
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 4230
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011357168966368491
          cur_lr: 5.000000000000001e-05
          entropy: 0.8279358983039856
          entropy_coeff: 0.009999999999999998
          kl: 0.011476066212875778
          policy_loss: 0.011730433172649808
          total_loss: 0.015347300966580709
          vf_explained_var: 0.2677404284477234
          vf_loss: 0.011765892648448547
    num_agent_steps_sampled: 1152000
    num_agent_steps_trained: 1152000
    num_steps_sampled: 1152000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1152,49920.2,1152000,-2.3661,-2.03,-2.85,236.61


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1153000
  custom_metrics: {}
  date: 2021-10-25_06-24-03
  done: false
  episode_len_mean: 236.68
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3667999999999934
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 4234
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011357168966368491
          cur_lr: 5.000000000000001e-05
          entropy: 0.9808789650599162
          entropy_coeff: 0.009999999999999998
          kl: 0.01841890360812652
          policy_loss: 0.0390874600244893
          total_loss: 0.04105132867892583
          vf_explained_var: 0.32363057136535645
          vf_loss: 0.011563470928619306
    num_agent_steps_sampled: 1153000
    num_agent_steps_trained: 1153000
    num_steps_sampled: 1153000
    num_steps_trained: 11530

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1153,49967.5,1153000,-2.3668,-2.03,-2.85,236.68


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1154000
  custom_metrics: {}
  date: 2021-10-25_06-24-47
  done: false
  episode_len_mean: 237.49
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.374899999999993
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 4238
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011357168966368491
          cur_lr: 5.000000000000001e-05
          entropy: 0.9812058481905196
          entropy_coeff: 0.009999999999999998
          kl: 0.02226406334983769
          policy_loss: 0.0019974765678246816
          total_loss: 0.005130189574427075
          vf_explained_var: 0.36336132884025574
          vf_loss: 0.012691916452927722
    num_agent_steps_sampled: 1154000
    num_agent_steps_trained: 1154000
    num_steps_sampled: 1154000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1154,50011.7,1154000,-2.3749,-2.03,-2.85,237.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1155000
  custom_metrics: {}
  date: 2021-10-25_06-25-31
  done: false
  episode_len_mean: 239.4
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3939999999999926
  episode_reward_min: -2.959999999999981
  episodes_this_iter: 3
  episodes_total: 4241
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01703575344955274
          cur_lr: 5.000000000000001e-05
          entropy: 1.2469793306456671
          entropy_coeff: 0.009999999999999998
          kl: 0.023066788085043498
          policy_loss: -0.12423661301533381
          total_loss: -0.12454503840870328
          vf_explained_var: 0.5006821751594543
          vf_loss: 0.011768411389655536
    num_agent_steps_sampled: 1155000
    num_agent_steps_trained: 1155000
    num_steps_sampled: 1155000
    num_steps_trained: 1155

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1155,50055.9,1155000,-2.394,-2.03,-2.96,239.4


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1156000
  custom_metrics: {}
  date: 2021-10-25_06-26-15
  done: false
  episode_len_mean: 240.7
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.4069999999999925
  episode_reward_min: -2.9699999999999807
  episodes_this_iter: 4
  episodes_total: 4245
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.025553630174329104
          cur_lr: 5.000000000000001e-05
          entropy: 1.2934355841742622
          entropy_coeff: 0.009999999999999998
          kl: 0.028597467286037355
          policy_loss: 0.00300251262055503
          total_loss: -7.375329732894897e-06
          vf_explained_var: 0.6297714114189148
          vf_loss: 0.009193697174002107
    num_agent_steps_sampled: 1156000
    num_agent_steps_trained: 1156000
    num_steps_sampled: 1156000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1156,50099.1,1156000,-2.407,-2.03,-2.97,240.7


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1157000
  custom_metrics: {}
  date: 2021-10-25_06-26-58
  done: false
  episode_len_mean: 242.03
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.420299999999992
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 4249
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03833044526149366
          cur_lr: 5.000000000000001e-05
          entropy: 1.3182078361511231
          entropy_coeff: 0.009999999999999998
          kl: 0.020405711670113923
          policy_loss: 0.02177848509616322
          total_loss: 0.020734099629852506
          vf_explained_var: 0.5597313642501831
          vf_loss: 0.011355531795157327
    num_agent_steps_sampled: 1157000
    num_agent_steps_trained: 1157000
    num_steps_sampled: 1157000
    num_steps_trained: 1157

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1157,50142.1,1157000,-2.4203,-2.03,-3.02,242.03


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1158000
  custom_metrics: {}
  date: 2021-10-25_06-27-42
  done: false
  episode_len_mean: 243.16
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.431599999999992
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 4253
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 1.120814945962694
          entropy_coeff: 0.009999999999999998
          kl: 0.011359645508943809
          policy_loss: 0.01612049374315474
          total_loss: 0.01626360027326478
          vf_explained_var: 0.4521773159503937
          vf_loss: 0.010698119690641762
    num_agent_steps_sampled: 1158000
    num_agent_steps_trained: 1158000
    num_steps_sampled: 1158000
    num_steps_trained: 115800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1158,50186.9,1158000,-2.4316,-2.03,-3.02,243.16


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1159000
  custom_metrics: {}
  date: 2021-10-25_06-28-26
  done: false
  episode_len_mean: 243.8
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.4379999999999917
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 3
  episodes_total: 4256
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 1.3347329563564725
          entropy_coeff: 0.009999999999999998
          kl: 0.01577723354533958
          policy_loss: -0.034513239314158756
          total_loss: -0.038374736656745276
          vf_explained_var: 0.5095061659812927
          vf_loss: 0.008578709920402616
    num_agent_steps_sampled: 1159000
    num_agent_steps_trained: 1159000
    num_steps_sampled: 1159000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1159,50230.5,1159000,-2.438,-2.03,-3.44,243.8




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1160000
  custom_metrics: {}
  date: 2021-10-25_06-29-30
  done: false
  episode_len_mean: 243.98
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.439799999999992
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 5
  episodes_total: 4261
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 1.0767296334107717
          entropy_coeff: 0.009999999999999998
          kl: 0.011067530698879184
          policy_loss: -0.041984049768911465
          total_loss: -0.03764372459716267
          vf_explained_var: 0.43626898527145386
          vf_loss: 0.014471284931318627
    num_agent_steps_sampled: 1160000
    num_agent_steps_trained: 1160000
    num_steps_sampled: 1160000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1160,50294.9,1160000,-2.4398,-2.03,-3.44,243.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1161000
  custom_metrics: {}
  date: 2021-10-25_06-30-18
  done: false
  episode_len_mean: 244.52
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.4451999999999914
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4265
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 1.1034921162658267
          entropy_coeff: 0.009999999999999998
          kl: 0.010520065658063989
          policy_loss: -0.008720027572578854
          total_loss: -0.008444170405467352
          vf_explained_var: 0.5477696061134338
          vf_loss: 0.010705918135742346
    num_agent_steps_sampled: 1161000
    num_agent_steps_trained: 1161000
    num_steps_sampled: 1161000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1161,50342.7,1161000,-2.4452,-2.03,-3.44,244.52


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1162000
  custom_metrics: {}
  date: 2021-10-25_06-31-05
  done: false
  episode_len_mean: 244.94
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.449399999999992
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4269
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 0.9941748711797926
          entropy_coeff: 0.009999999999999998
          kl: 0.010721734586124635
          policy_loss: 0.0169421701795525
          total_loss: 0.018278158042165968
          vf_explained_var: 0.32537201046943665
          vf_loss: 0.010661286322606935
    num_agent_steps_sampled: 1162000
    num_agent_steps_trained: 1162000
    num_steps_sampled: 1162000
    num_steps_trained: 1162

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1162,50389,1162000,-2.4494,-2.03,-3.44,244.94


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1163000
  custom_metrics: {}
  date: 2021-10-25_06-31-53
  done: false
  episode_len_mean: 245.34
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.453399999999992
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4273
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 0.945216382212109
          entropy_coeff: 0.009999999999999998
          kl: 0.010005300847845433
          policy_loss: 0.039339994349413446
          total_loss: 0.041152611995736756
          vf_explained_var: 0.2662264406681061
          vf_loss: 0.010689519200887945
    num_agent_steps_sampled: 1163000
    num_agent_steps_trained: 1163000
    num_steps_sampled: 1163000
    num_steps_trained: 1163000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1163,50437.3,1163000,-2.4534,-2.06,-3.44,245.34


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1164000
  custom_metrics: {}
  date: 2021-10-25_06-32-41
  done: false
  episode_len_mean: 245.68
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4567999999999914
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4277
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 0.8729346017042796
          entropy_coeff: 0.009999999999999998
          kl: 0.013315871943656058
          policy_loss: 0.01336393298374282
          total_loss: 0.017109975715478263
          vf_explained_var: 0.21280503273010254
          vf_loss: 0.01170978539933761
    num_agent_steps_sampled: 1164000
    num_agent_steps_trained: 1164000
    num_steps_sampled: 1164000
    num_steps_trained: 1164000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1164,50485.7,1164000,-2.4568,-2.06,-3.44,245.68


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1165000
  custom_metrics: {}
  date: 2021-10-25_06-33-30
  done: false
  episode_len_mean: 245.74
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4573999999999914
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4281
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 0.8867987030082278
          entropy_coeff: 0.009999999999999998
          kl: 0.006197244320257787
          policy_loss: -0.028353092115786342
          total_loss: -0.024197189095947477
          vf_explained_var: 0.1864163875579834
          vf_loss: 0.012667574836975998
    num_agent_steps_sampled: 1165000
    num_agent_steps_trained: 1165000
    num_steps_sampled: 1165000
    num_steps_trained: 1165000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1165,50534,1165000,-2.4574,-2.06,-3.44,245.74


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1166000
  custom_metrics: {}
  date: 2021-10-25_06-34-17
  done: false
  episode_len_mean: 245.45
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.454499999999992
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 5
  episodes_total: 4286
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 0.8930350151326921
          entropy_coeff: 0.009999999999999998
          kl: 0.013353962302408541
          policy_loss: -0.022228760520617168
          total_loss: -0.014601092868381077
          vf_explained_var: 0.1901644617319107
          vf_loss: 0.015790224230537813
    num_agent_steps_sampled: 1166000
    num_agent_steps_trained: 1166000
    num_steps_sampled: 1166000
    num_steps_trained: 1166000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1166,50581.5,1166000,-2.4545,-2.06,-3.44,245.45




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1167000
  custom_metrics: {}
  date: 2021-10-25_06-35-22
  done: false
  episode_len_mean: 245.12
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4511999999999916
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4290
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 1.0072015709347195
          entropy_coeff: 0.009999999999999998
          kl: 0.009461830367790865
          policy_loss: 0.011076846139298544
          total_loss: 0.01372697576880455
          vf_explained_var: 0.2728099524974823
          vf_loss: 0.012178132434686025
    num_agent_steps_sampled: 1167000
    num_agent_steps_trained: 1167000
    num_steps_sampled: 1167000
    num_steps_trained: 116

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1167,50646.1,1167000,-2.4512,-2.05,-3.44,245.12


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1168000
  custom_metrics: {}
  date: 2021-10-25_06-36-09
  done: false
  episode_len_mean: 245.49
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4548999999999914
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4294
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 1.0097759955459171
          entropy_coeff: 0.009999999999999998
          kl: 0.009665250958278351
          policy_loss: 0.03918568810655011
          total_loss: 0.039763546155558695
          vf_explained_var: 0.3354528248310089
          vf_loss: 0.010119908147801955
    num_agent_steps_sampled: 1168000
    num_agent_steps_trained: 1168000
    num_steps_sampled: 1168000
    num_steps_trained: 116

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1168,50693.4,1168000,-2.4549,-2.05,-3.44,245.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1169000
  custom_metrics: {}
  date: 2021-10-25_06-36-57
  done: false
  episode_len_mean: 245.48
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.454799999999991
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4298
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 0.8798622237311469
          entropy_coeff: 0.009999999999999998
          kl: 0.006781177240622406
          policy_loss: 0.03505806210968229
          total_loss: 0.038597997857464685
          vf_explained_var: 0.19885168969631195
          vf_loss: 0.011948668056478103
    num_agent_steps_sampled: 1169000
    num_agent_steps_trained: 1169000
    num_steps_sampled: 1169000
    num_steps_trained: 116

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1169,50741,1169000,-2.4548,-2.05,-3.44,245.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1170000
  custom_metrics: {}
  date: 2021-10-25_06-37-41
  done: false
  episode_len_mean: 245.94
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4593999999999916
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4302
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 0.9717734568648868
          entropy_coeff: 0.009999999999999998
          kl: 0.011002260841366428
          policy_loss: -0.02312616068455908
          total_loss: -0.018652258068323137
          vf_explained_var: 0.2195218801498413
          vf_loss: 0.013559052937974532
    num_agent_steps_sampled: 1170000
    num_agent_steps_trained: 1170000
    num_steps_sampled: 1170000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1170,50785.7,1170000,-2.4594,-2.05,-3.44,245.94


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1171000
  custom_metrics: {}
  date: 2021-10-25_06-38-29
  done: false
  episode_len_mean: 246.17
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.461699999999991
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4306
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 0.873620002799564
          entropy_coeff: 0.009999999999999998
          kl: 0.00552176269603911
          policy_loss: -0.017123761441972522
          total_loss: -0.012151683535840776
          vf_explained_var: 0.21137790381908417
          vf_loss: 0.013390797914730178
    num_agent_steps_sampled: 1171000
    num_agent_steps_trained: 1171000
    num_steps_sampled: 1171000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1171,50832.7,1171000,-2.4617,-2.05,-3.44,246.17


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1172000
  custom_metrics: {}
  date: 2021-10-25_06-39-17
  done: false
  episode_len_mean: 246.0
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4599999999999915
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 5
  episodes_total: 4311
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 0.8724193553129832
          entropy_coeff: 0.009999999999999998
          kl: 0.006314479461185682
          policy_loss: -0.03163185570802954
          total_loss: -0.02349297772679064
          vf_explained_var: 0.21235911548137665
          vf_loss: 0.016500018619828753
    num_agent_steps_sampled: 1172000
    num_agent_steps_trained: 1172000
    num_steps_sampled: 1172000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1172,50881.2,1172000,-2.46,-2.05,-3.44,246


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1173000
  custom_metrics: {}
  date: 2021-10-25_06-40-04
  done: false
  episode_len_mean: 245.79
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4578999999999915
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4315
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 1.1283204449547661
          entropy_coeff: 0.009999999999999998
          kl: 0.010713981772925853
          policy_loss: -0.004160051461723116
          total_loss: -0.0026330683794286514
          vf_explained_var: 0.35235753655433655
          vf_loss: 0.01219417896742622
    num_agent_steps_sampled: 1173000
    num_agent_steps_trained: 1173000
    num_steps_sampled: 1173000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1173,50927.8,1173000,-2.4579,-2.05,-3.44,245.79


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1174000
  custom_metrics: {}
  date: 2021-10-25_06-40-49
  done: false
  episode_len_mean: 246.41
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4640999999999913
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4319
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 1.1758870588408576
          entropy_coeff: 0.009999999999999998
          kl: 0.011409862866979086
          policy_loss: -0.020603717035717433
          total_loss: -0.01969475191500452
          vf_explained_var: 0.3985564410686493
          vf_loss: 0.012011817935854197
    num_agent_steps_sampled: 1174000
    num_agent_steps_trained: 1174000
    num_steps_sampled: 1174000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1174,50973.5,1174000,-2.4641,-2.05,-3.44,246.41




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1175000
  custom_metrics: {}
  date: 2021-10-25_06-41-53
  done: false
  episode_len_mean: 246.55
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4654999999999916
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4323
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 1.137198011080424
          entropy_coeff: 0.009999999999999998
          kl: 0.010145589634510798
          policy_loss: -0.001988969991604487
          total_loss: -0.000790892955329683
          vf_explained_var: 0.40252381563186646
          vf_loss: 0.01198672992694709
    num_agent_steps_sampled: 1175000
    num_agent_steps_trained: 1175000
    num_steps_sampled: 1175000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1175,51037,1175000,-2.4655,-2.05,-3.44,246.55


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1176000
  custom_metrics: {}
  date: 2021-10-25_06-42-38
  done: false
  episode_len_mean: 247.0
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.469999999999991
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4327
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 1.0899988114833832
          entropy_coeff: 0.009999999999999998
          kl: 0.008645363399005469
          policy_loss: 0.03952398788597849
          total_loss: 0.040563423186540604
          vf_explained_var: 0.3571273684501648
          vf_loss: 0.011442351010110643
    num_agent_steps_sampled: 1176000
    num_agent_steps_trained: 1176000
    num_steps_sampled: 1176000
    num_steps_trained: 11760

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1176,51082.2,1176000,-2.47,-2.05,-3.44,247


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1177000
  custom_metrics: {}
  date: 2021-10-25_06-43-24
  done: false
  episode_len_mean: 248.09
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.480899999999991
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4331
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 1.18761860926946
          entropy_coeff: 0.009999999999999998
          kl: 0.01409217769698612
          policy_loss: -0.0045769465466340385
          total_loss: -0.003157988770140542
          vf_explained_var: 0.37226638197898865
          vf_loss: 0.0124849038819472
    num_agent_steps_sampled: 1177000
    num_agent_steps_trained: 1177000
    num_steps_sampled: 1177000
    num_steps_trained: 1177

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1177,51128.2,1177000,-2.4809,-2.05,-3.44,248.09


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1178000
  custom_metrics: {}
  date: 2021-10-25_06-44-11
  done: false
  episode_len_mean: 247.85
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.478499999999991
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4335
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 1.1197945091459487
          entropy_coeff: 0.009999999999999998
          kl: 0.008461628385233174
          policy_loss: 0.028449505236413743
          total_loss: 0.030289200858937368
          vf_explained_var: 0.3303159773349762
          vf_loss: 0.01255113510414958
    num_agent_steps_sampled: 1178000
    num_agent_steps_trained: 1178000
    num_steps_sampled: 1178000
    num_steps_trained: 1178

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1178,51174.9,1178000,-2.4785,-2.05,-3.44,247.85


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1179000
  custom_metrics: {}
  date: 2021-10-25_06-44-58
  done: false
  episode_len_mean: 247.15
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.471499999999991
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4339
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05749566789224048
          cur_lr: 5.000000000000001e-05
          entropy: 1.1797787215974596
          entropy_coeff: 0.009999999999999998
          kl: 0.023748042370345602
          policy_loss: 0.028167622908949853
          total_loss: 0.030296272080805566
          vf_explained_var: 0.29458141326904297
          vf_loss: 0.012561026153465112
    num_agent_steps_sampled: 1179000
    num_agent_steps_trained: 1179000
    num_steps_sampled: 1179000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1179,51221.7,1179000,-2.4715,-2.05,-3.44,247.15


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1180000
  custom_metrics: {}
  date: 2021-10-25_06-45-40
  done: false
  episode_len_mean: 247.01
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.470099999999991
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4343
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08624350183836073
          cur_lr: 5.000000000000001e-05
          entropy: 1.3144675294558208
          entropy_coeff: 0.009999999999999998
          kl: 0.010751134191406051
          policy_loss: -0.004972040984365675
          total_loss: -0.004574801358911727
          vf_explained_var: 0.2860862612724304
          vf_loss: 0.012614697569774257
    num_agent_steps_sampled: 1180000
    num_agent_steps_trained: 1180000
    num_steps_sampled: 1180000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1180,51264.1,1180000,-2.4701,-2.05,-3.44,247.01


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1181000
  custom_metrics: {}
  date: 2021-10-25_06-46-23
  done: false
  episode_len_mean: 246.71
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4670999999999914
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 3
  episodes_total: 4346
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08624350183836073
          cur_lr: 5.000000000000001e-05
          entropy: 1.2275502734714083
          entropy_coeff: 0.009999999999999998
          kl: 0.013033875484212368
          policy_loss: -0.0869640823867586
          total_loss: -0.08543012100789282
          vf_explained_var: 0.22359786927700043
          vf_loss: 0.012685373725576533
    num_agent_steps_sampled: 1181000
    num_agent_steps_trained: 1181000
    num_steps_sampled: 1181000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1181,51306.7,1181000,-2.4671,-2.05,-3.44,246.71




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1182000
  custom_metrics: {}
  date: 2021-10-25_06-47-21
  done: false
  episode_len_mean: 246.78
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.467799999999991
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 4350
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08624350183836073
          cur_lr: 5.000000000000001e-05
          entropy: 1.3735609226756627
          entropy_coeff: 0.009999999999999998
          kl: 0.007701974242164529
          policy_loss: -0.006117345102959209
          total_loss: -0.006660128550397025
          vf_explained_var: 0.30977919697761536
          vf_loss: 0.01252858124466406
    num_agent_steps_sampled: 1182000
    num_agent_steps_trained: 1182000
    num_steps_sampled: 1182000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1182,51365.3,1182000,-2.4678,-2.05,-3.44,246.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1183000
  custom_metrics: {}
  date: 2021-10-25_06-48-06
  done: false
  episode_len_mean: 245.81
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.458099999999991
  episode_reward_min: -2.9399999999999813
  episodes_this_iter: 4
  episodes_total: 4354
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08624350183836073
          cur_lr: 5.000000000000001e-05
          entropy: 1.2526835494571262
          entropy_coeff: 0.009999999999999998
          kl: 0.010385992268786342
          policy_loss: 0.024735352562533484
          total_loss: 0.02585964293943511
          vf_explained_var: 0.20075294375419617
          vf_loss: 0.012755402446620994
    num_agent_steps_sampled: 1183000
    num_agent_steps_trained: 1183000
    num_steps_sampled: 1183000
    num_steps_trained: 118

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1183,51409.7,1183000,-2.4581,-2.05,-2.94,245.81


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1184000
  custom_metrics: {}
  date: 2021-10-25_06-48-50
  done: false
  episode_len_mean: 246.32
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4631999999999916
  episode_reward_min: -2.9399999999999813
  episodes_this_iter: 4
  episodes_total: 4358
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08624350183836073
          cur_lr: 5.000000000000001e-05
          entropy: 1.1686930457750957
          entropy_coeff: 0.009999999999999998
          kl: 0.009166176776150949
          policy_loss: 0.010277862598498663
          total_loss: 0.012231636212931739
          vf_explained_var: 0.1981111764907837
          vf_loss: 0.012850181437614891
    num_agent_steps_sampled: 1184000
    num_agent_steps_trained: 1184000
    num_steps_sampled: 1184000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1184,51453.8,1184000,-2.4632,-2.05,-2.94,246.32


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1185000
  custom_metrics: {}
  date: 2021-10-25_06-49-34
  done: false
  episode_len_mean: 247.13
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.471299999999991
  episode_reward_min: -2.9399999999999813
  episodes_this_iter: 4
  episodes_total: 4362
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08624350183836073
          cur_lr: 5.000000000000001e-05
          entropy: 1.1026920610004
          entropy_coeff: 0.009999999999999998
          kl: 0.010674282529115212
          policy_loss: 0.02227134042316013
          total_loss: 0.025344381233056386
          vf_explained_var: 0.13951784372329712
          vf_loss: 0.013179373420361016
    num_agent_steps_sampled: 1185000
    num_agent_steps_trained: 1185000
    num_steps_sampled: 1185000
    num_steps_trained: 118500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1185,51498.2,1185000,-2.4713,-2.05,-2.94,247.13


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1186000
  custom_metrics: {}
  date: 2021-10-25_06-50-19
  done: false
  episode_len_mean: 247.12
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.471199999999991
  episode_reward_min: -2.9399999999999813
  episodes_this_iter: 4
  episodes_total: 4366
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08624350183836073
          cur_lr: 5.000000000000001e-05
          entropy: 1.1827610717879402
          entropy_coeff: 0.009999999999999998
          kl: 0.008494899675267852
          policy_loss: 0.017861336138513352
          total_loss: 0.020334118025170433
          vf_explained_var: 0.11995117366313934
          vf_loss: 0.013567764694905943
    num_agent_steps_sampled: 1186000
    num_agent_steps_trained: 1186000
    num_steps_sampled: 1186000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1186,51543.2,1186000,-2.4712,-2.05,-2.94,247.12


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1187000
  custom_metrics: {}
  date: 2021-10-25_06-51-03
  done: false
  episode_len_mean: 247.79
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.477899999999991
  episode_reward_min: -2.9399999999999813
  episodes_this_iter: 4
  episodes_total: 4370
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08624350183836073
          cur_lr: 5.000000000000001e-05
          entropy: 1.172662251525455
          entropy_coeff: 0.009999999999999998
          kl: 0.009306651099729122
          policy_loss: 0.011876220173305935
          total_loss: 0.014937324987517462
          vf_explained_var: 0.09754511713981628
          vf_loss: 0.013985088871171078
    num_agent_steps_sampled: 1187000
    num_agent_steps_trained: 1187000
    num_steps_sampled: 1187000
    num_steps_trained: 118

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1187,51586.9,1187000,-2.4779,-2.05,-2.94,247.79


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1188000
  custom_metrics: {}
  date: 2021-10-25_06-51-48
  done: false
  episode_len_mean: 248.38
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4837999999999916
  episode_reward_min: -2.9399999999999813
  episodes_this_iter: 4
  episodes_total: 4374
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08624350183836073
          cur_lr: 5.000000000000001e-05
          entropy: 1.1068158004018995
          entropy_coeff: 0.009999999999999998
          kl: 0.008826775089729072
          policy_loss: 0.024881742522120477
          total_loss: 0.027128444694810443
          vf_explained_var: 0.09013044834136963
          vf_loss: 0.012553606554865837
    num_agent_steps_sampled: 1188000
    num_agent_steps_trained: 1188000
    num_steps_sampled: 1188000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1188,51631.9,1188000,-2.4838,-2.05,-2.94,248.38


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1189000
  custom_metrics: {}
  date: 2021-10-25_06-52-33
  done: false
  episode_len_mean: 249.2
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4919999999999907
  episode_reward_min: -2.9399999999999813
  episodes_this_iter: 4
  episodes_total: 4378
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08624350183836073
          cur_lr: 5.000000000000001e-05
          entropy: 1.0968490070766872
          entropy_coeff: 0.009999999999999998
          kl: 0.012000210614050546
          policy_loss: -0.0006393957469198439
          total_loss: 0.004364862872494592
          vf_explained_var: 0.08546590059995651
          vf_loss: 0.014937806046671338
    num_agent_steps_sampled: 1189000
    num_agent_steps_trained: 1189000
    num_steps_sampled: 1189000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1189,51676.4,1189000,-2.492,-2.05,-2.94,249.2




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1190000
  custom_metrics: {}
  date: 2021-10-25_06-53-36
  done: false
  episode_len_mean: 249.36
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.493599999999991
  episode_reward_min: -2.9399999999999813
  episodes_this_iter: 4
  episodes_total: 4382
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08624350183836073
          cur_lr: 5.000000000000001e-05
          entropy: 1.112999161084493
          entropy_coeff: 0.009999999999999998
          kl: 0.011701072766884145
          policy_loss: -0.010934190534883075
          total_loss: -0.007741350390844875
          vf_explained_var: 0.21477268636226654
          vf_loss: 0.013313693884346221
    num_agent_steps_sampled: 1190000
    num_agent_steps_trained: 1190000
    num_steps_sampled: 1190000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1190,51739.6,1190000,-2.4936,-2.05,-2.94,249.36


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1191000
  custom_metrics: {}
  date: 2021-10-25_06-54-21
  done: false
  episode_len_mean: 250.12
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5011999999999905
  episode_reward_min: -2.9399999999999813
  episodes_this_iter: 3
  episodes_total: 4385
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08624350183836073
          cur_lr: 5.000000000000001e-05
          entropy: 1.1713625497288174
          entropy_coeff: 0.009999999999999998
          kl: 0.01629848647969286
          policy_loss: -0.10974234872394138
          total_loss: -0.105186571015252
          vf_explained_var: 0.1375676840543747
          vf_loss: 0.0148637642359568
    num_agent_steps_sampled: 1191000
    num_agent_steps_trained: 1191000
    num_steps_sampled: 1191000
    num_steps_trained: 1191000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1191,51784.4,1191000,-2.5012,-2.05,-2.94,250.12


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1192000
  custom_metrics: {}
  date: 2021-10-25_06-55-06
  done: false
  episode_len_mean: 250.74
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5073999999999903
  episode_reward_min: -2.9399999999999813
  episodes_this_iter: 4
  episodes_total: 4389
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08624350183836073
          cur_lr: 5.000000000000001e-05
          entropy: 1.090049296617508
          entropy_coeff: 0.009999999999999998
          kl: 0.009283186202200788
          policy_loss: -0.11889821837345758
          total_loss: -0.11126614006029235
          vf_explained_var: 0.1560843586921692
          vf_loss: 0.017731958575960664
    num_agent_steps_sampled: 1192000
    num_agent_steps_trained: 1192000
    num_steps_sampled: 1192000
    num_steps_trained: 119

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1192,51829.2,1192000,-2.5074,-2.05,-2.94,250.74


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1193000
  custom_metrics: {}
  date: 2021-10-25_06-55-50
  done: false
  episode_len_mean: 251.63
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.5162999999999904
  episode_reward_min: -2.9399999999999813
  episodes_this_iter: 4
  episodes_total: 4393
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08624350183836073
          cur_lr: 5.000000000000001e-05
          entropy: 1.3962792873382568
          entropy_coeff: 0.009999999999999998
          kl: 0.03243076668812566
          policy_loss: -0.027060765359136794
          total_loss: -0.024421365807453792
          vf_explained_var: 0.22182051837444305
          vf_loss: 0.013805251754820346
    num_agent_steps_sampled: 1193000
    num_agent_steps_trained: 1193000
    num_steps_sampled: 1193000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1193,51873.4,1193000,-2.5163,-2.16,-2.94,251.63


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1194000
  custom_metrics: {}
  date: 2021-10-25_06-56-33
  done: false
  episode_len_mean: 252.74
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.5273999999999903
  episode_reward_min: -2.9399999999999813
  episodes_this_iter: 4
  episodes_total: 4397
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1293652527575411
          cur_lr: 5.000000000000001e-05
          entropy: 1.2448967357476552
          entropy_coeff: 0.009999999999999998
          kl: 0.008902745856625907
          policy_loss: 0.007592573679155773
          total_loss: 0.008824532479047775
          vf_explained_var: 0.22618745267391205
          vf_loss: 0.012529218817750614
    num_agent_steps_sampled: 1194000
    num_agent_steps_trained: 1194000
    num_steps_sampled: 1194000
    num_steps_trained: 119

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1194,51916.9,1194000,-2.5274,-2.16,-2.94,252.74


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1195000
  custom_metrics: {}
  date: 2021-10-25_06-57-16
  done: false
  episode_len_mean: 253.82
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.5381999999999896
  episode_reward_min: -2.9399999999999813
  episodes_this_iter: 4
  episodes_total: 4401
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1293652527575411
          cur_lr: 5.000000000000001e-05
          entropy: 1.2867318789164226
          entropy_coeff: 0.009999999999999998
          kl: 0.009731228107066335
          policy_loss: -0.005048917979001999
          total_loss: -0.0033004101779725817
          vf_explained_var: 0.08241433650255203
          vf_loss: 0.013356941917704212
    num_agent_steps_sampled: 1195000
    num_agent_steps_trained: 1195000
    num_steps_sampled: 1195000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1195,51959.3,1195000,-2.5382,-2.16,-2.94,253.82


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1196000
  custom_metrics: {}
  date: 2021-10-25_06-57-59
  done: false
  episode_len_mean: 253.8
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.5379999999999896
  episode_reward_min: -2.8799999999999826
  episodes_this_iter: 3
  episodes_total: 4404
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1293652527575411
          cur_lr: 5.000000000000001e-05
          entropy: 1.1947988616095648
          entropy_coeff: 0.009999999999999998
          kl: 0.012675254318822403
          policy_loss: -0.1419655178156164
          total_loss: -0.1402168431215816
          vf_explained_var: 0.08002348244190216
          vf_loss: 0.012056926627539925
    num_agent_steps_sampled: 1196000
    num_agent_steps_trained: 1196000
    num_steps_sampled: 1196000
    num_steps_trained: 119600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1196,52002.2,1196000,-2.538,-2.16,-2.88,253.8


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1197000
  custom_metrics: {}
  date: 2021-10-25_06-58-42
  done: false
  episode_len_mean: 255.6
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.5559999999999894
  episode_reward_min: -3.179999999999976
  episodes_this_iter: 4
  episodes_total: 4408
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1293652527575411
          cur_lr: 5.000000000000001e-05
          entropy: 1.294308512740665
          entropy_coeff: 0.009999999999999998
          kl: 0.010046284717784222
          policy_loss: 0.01405007868177361
          total_loss: 0.016229137778282166
          vf_explained_var: 0.07504262775182724
          vf_loss: 0.013822501949552033
    num_agent_steps_sampled: 1197000
    num_agent_steps_trained: 1197000
    num_steps_sampled: 1197000
    num_steps_trained: 1197000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1197,52045.2,1197000,-2.556,-2.16,-3.18,255.6




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1198000
  custom_metrics: {}
  date: 2021-10-25_06-59-38
  done: false
  episode_len_mean: 257.1
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.570999999999989
  episode_reward_min: -3.179999999999976
  episodes_this_iter: 4
  episodes_total: 4412
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1293652527575411
          cur_lr: 5.000000000000001e-05
          entropy: 1.334655029243893
          entropy_coeff: 0.009999999999999998
          kl: 0.012690332801455848
          policy_loss: 0.004602561021844546
          total_loss: 0.006535130904780494
          vf_explained_var: 0.12626416981220245
          vf_loss: 0.013637435705297523
    num_agent_steps_sampled: 1198000
    num_agent_steps_trained: 1198000
    num_steps_sampled: 1198000
    num_steps_trained: 1198000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1198,52101.7,1198000,-2.571,-2.16,-3.18,257.1


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1199000
  custom_metrics: {}
  date: 2021-10-25_07-00-19
  done: false
  episode_len_mean: 257.74
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.5773999999999893
  episode_reward_min: -3.179999999999976
  episodes_this_iter: 3
  episodes_total: 4415
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1293652527575411
          cur_lr: 5.000000000000001e-05
          entropy: 1.2024185207155016
          entropy_coeff: 0.009999999999999998
          kl: 0.016627882035272438
          policy_loss: -0.10057037580344412
          total_loss: -0.09848769307136536
          vf_explained_var: 0.1902957260608673
          vf_loss: 0.011955793388187886
    num_agent_steps_sampled: 1199000
    num_agent_steps_trained: 1199000
    num_steps_sampled: 1199000
    num_steps_trained: 11990

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1199,52142.3,1199000,-2.5774,-2.16,-3.18,257.74


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1200000
  custom_metrics: {}
  date: 2021-10-25_07-00-56
  done: false
  episode_len_mean: 259.84
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.5983999999999883
  episode_reward_min: -3.9499999999999598
  episodes_this_iter: 4
  episodes_total: 4419
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1293652527575411
          cur_lr: 5.000000000000001e-05
          entropy: 1.3450103243192038
          entropy_coeff: 0.009999999999999998
          kl: 0.02449528040754865
          policy_loss: -0.0689274991552035
          total_loss: -0.06737856997383965
          vf_explained_var: 0.48711201548576355
          vf_loss: 0.011830187614800201
    num_agent_steps_sampled: 1200000
    num_agent_steps_trained: 1200000
    num_steps_sampled: 1200000
    num_steps_trained: 12000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1200,52179.2,1200000,-2.5984,-2.16,-3.95,259.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1201000
  custom_metrics: {}
  date: 2021-10-25_07-01-37
  done: false
  episode_len_mean: 261.41
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.6140999999999877
  episode_reward_min: -3.9499999999999598
  episodes_this_iter: 3
  episodes_total: 4422
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1940478791363117
          cur_lr: 5.000000000000001e-05
          entropy: 1.404157817363739
          entropy_coeff: 0.009999999999999998
          kl: 0.02365100986541135
          policy_loss: 0.021635402656263774
          total_loss: 0.02003227670987447
          vf_explained_var: 0.4807477593421936
          vf_loss: 0.007849024683754478
    num_agent_steps_sampled: 1201000
    num_agent_steps_trained: 1201000
    num_steps_sampled: 1201000
    num_steps_trained: 1201000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1201,52220.3,1201000,-2.6141,-2.21,-3.95,261.41


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1202000
  custom_metrics: {}
  date: 2021-10-25_07-02-09
  done: false
  episode_len_mean: 263.83
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.638299999999987
  episode_reward_min: -3.9499999999999598
  episodes_this_iter: 3
  episodes_total: 4425
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2910718187044674
          cur_lr: 5.000000000000001e-05
          entropy: 1.7566761904292636
          entropy_coeff: 0.009999999999999998
          kl: 0.02434966437637548
          policy_loss: -0.027214675148328146
          total_loss: -0.03029017580880059
          vf_explained_var: 0.45937439799308777
          vf_loss: 0.0074037631507962946
    num_agent_steps_sampled: 1202000
    num_agent_steps_trained: 1202000
    num_steps_sampled: 1202000
    num_steps_trained: 120

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1202,52252.4,1202000,-2.6383,-2.21,-3.95,263.83


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1203000
  custom_metrics: {}
  date: 2021-10-25_07-02-42
  done: false
  episode_len_mean: 267.49
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.6748999999999863
  episode_reward_min: -3.9499999999999598
  episodes_this_iter: 3
  episodes_total: 4428
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4366077280567012
          cur_lr: 5.000000000000001e-05
          entropy: 1.8360420690642463
          entropy_coeff: 0.009999999999999998
          kl: 0.018109752078141424
          policy_loss: 0.0035968694421980116
          total_loss: 0.0018854699201054043
          vf_explained_var: 0.10280980169773102
          vf_loss: 0.008742164961424553
    num_agent_steps_sampled: 1203000
    num_agent_steps_trained: 1203000
    num_steps_sampled: 1203000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1203,52285.1,1203000,-2.6749,-2.21,-3.95,267.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1204000
  custom_metrics: {}
  date: 2021-10-25_07-03-13
  done: false
  episode_len_mean: 270.65
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.7064999999999872
  episode_reward_min: -3.9499999999999598
  episodes_this_iter: 3
  episodes_total: 4431
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4366077280567012
          cur_lr: 5.000000000000001e-05
          entropy: 1.8435263249609206
          entropy_coeff: 0.009999999999999998
          kl: 0.010457677299441945
          policy_loss: 0.014056572483645545
          total_loss: 0.00889987473686536
          vf_explained_var: 0.14455854892730713
          vf_loss: 0.008712659530445106
    num_agent_steps_sampled: 1204000
    num_agent_steps_trained: 1204000
    num_steps_sampled: 1204000
    num_steps_trained: 1204

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1204,52316.5,1204000,-2.7065,-2.21,-3.95,270.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1205000
  custom_metrics: {}
  date: 2021-10-25_07-03-44
  done: false
  episode_len_mean: 273.57
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.735699999999986
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 2
  episodes_total: 4433
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4366077280567012
          cur_lr: 5.000000000000001e-05
          entropy: 2.0696738931867813
          entropy_coeff: 0.009999999999999998
          kl: 0.012865002323613754
          policy_loss: -0.008725697298844655
          total_loss: -0.019463016755051084
          vf_explained_var: 0.2583857476711273
          vf_loss: 0.004342461859535737
    num_agent_steps_sampled: 1205000
    num_agent_steps_trained: 1205000
    num_steps_sampled: 1205000
    num_steps_trained: 1205

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1205,52347.6,1205000,-2.7357,-2.21,-3.99,273.57


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1206000
  custom_metrics: {}
  date: 2021-10-25_07-04-14
  done: false
  episode_len_mean: 278.08
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.7807999999999837
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 3
  episodes_total: 4436
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4366077280567012
          cur_lr: 5.000000000000001e-05
          entropy: 1.871994185447693
          entropy_coeff: 0.009999999999999998
          kl: 0.009802861213044897
          policy_loss: 0.03437045357293553
          total_loss: 0.02825838178396225
          vf_explained_var: -0.04666182026267052
          vf_loss: 0.008327860699177512
    num_agent_steps_sampled: 1206000
    num_agent_steps_trained: 1206000
    num_steps_sampled: 1206000
    num_steps_trained: 120600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1206,52376.8,1206000,-2.7808,-2.21,-4.07,278.08


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1207000
  custom_metrics: {}
  date: 2021-10-25_07-04-44
  done: false
  episode_len_mean: 281.31
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.813099999999983
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 2
  episodes_total: 4438
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4366077280567012
          cur_lr: 5.000000000000001e-05
          entropy: 1.9232531203163994
          entropy_coeff: 0.009999999999999998
          kl: 0.013220591475615133
          policy_loss: -0.06202350109815598
          total_loss: -0.06531869537300533
          vf_explained_var: -0.09278798848390579
          vf_loss: 0.010165125123846035
    num_agent_steps_sampled: 1207000
    num_agent_steps_trained: 1207000
    num_steps_sampled: 1207000
    num_steps_trained: 1207

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1207,52407.4,1207000,-2.8131,-2.21,-4.07,281.31




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1208000
  custom_metrics: {}
  date: 2021-10-25_07-05-32
  done: false
  episode_len_mean: 284.84
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.8483999999999834
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 3
  episodes_total: 4441
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4366077280567012
          cur_lr: 5.000000000000001e-05
          entropy: 1.9255396339628432
          entropy_coeff: 0.009999999999999998
          kl: 0.011718197211149336
          policy_loss: 0.02392267675863372
          total_loss: 0.021207295109828314
          vf_explained_var: -0.2944590151309967
          vf_loss: 0.011423759523313492
    num_agent_steps_sampled: 1208000
    num_agent_steps_trained: 1208000
    num_steps_sampled: 1208000
    num_steps_trained: 12080

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1208,52455,1208000,-2.8484,-2.21,-4.27,284.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1209000
  custom_metrics: {}
  date: 2021-10-25_07-06-01
  done: false
  episode_len_mean: 287.73
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.877299999999983
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 2
  episodes_total: 4443
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4366077280567012
          cur_lr: 5.000000000000001e-05
          entropy: 1.961952539285024
          entropy_coeff: 0.009999999999999998
          kl: 0.014064724385052171
          policy_loss: -0.1068604040477011
          total_loss: -0.11335329363743464
          vf_explained_var: 0.015453772619366646
          vf_loss: 0.0069858645619307125
    num_agent_steps_sampled: 1209000
    num_agent_steps_trained: 1209000
    num_steps_sampled: 1209000
    num_steps_trained: 12090

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1209,52483.7,1209000,-2.8773,-2.21,-4.27,287.73


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1210000
  custom_metrics: {}
  date: 2021-10-25_07-06-33
  done: false
  episode_len_mean: 290.0
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.8999999999999826
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 3
  episodes_total: 4446
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4366077280567012
          cur_lr: 5.000000000000001e-05
          entropy: 1.6682371788554722
          entropy_coeff: 0.009999999999999998
          kl: 0.025607808773372653
          policy_loss: -0.10250376843743854
          total_loss: -0.09490725464291043
          vf_explained_var: 0.32157102227211
          vf_loss: 0.013098319133536682
    num_agent_steps_sampled: 1210000
    num_agent_steps_trained: 1210000
    num_steps_sampled: 1210000
    num_steps_trained: 1210000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1210,52516.4,1210000,-2.9,-2.21,-4.27,290


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1211000
  custom_metrics: {}
  date: 2021-10-25_07-07-05
  done: false
  episode_len_mean: 293.61
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.9360999999999815
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 3
  episodes_total: 4449
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6549115920850517
          cur_lr: 5.000000000000001e-05
          entropy: 1.7920613845189413
          entropy_coeff: 0.009999999999999998
          kl: 0.010412917965570518
          policy_loss: 0.025628865427441066
          total_loss: 0.026099329359001583
          vf_explained_var: -0.10473072528839111
          vf_loss: 0.011571536438229184
    num_agent_steps_sampled: 1211000
    num_agent_steps_trained: 1211000
    num_steps_sampled: 1211000
    num_steps_trained: 121

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1211,52547.6,1211000,-2.9361,-2.21,-4.27,293.61


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1212000
  custom_metrics: {}
  date: 2021-10-25_07-07-38
  done: false
  episode_len_mean: 295.93
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.9592999999999807
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 3
  episodes_total: 4452
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6549115920850517
          cur_lr: 5.000000000000001e-05
          entropy: 1.560287602742513
          entropy_coeff: 0.009999999999999998
          kl: 0.007927943902648673
          policy_loss: 0.11246948391199112
          total_loss: 0.1107245965136422
          vf_explained_var: 0.09190401434898376
          vf_loss: 0.008665888931136578
    num_agent_steps_sampled: 1212000
    num_agent_steps_trained: 1212000
    num_steps_sampled: 1212000
    num_steps_trained: 1212000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1212,52581.2,1212000,-2.9593,-2.21,-4.27,295.93


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1213000
  custom_metrics: {}
  date: 2021-10-25_07-08-16
  done: false
  episode_len_mean: 297.33
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.97329999999998
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 3
  episodes_total: 4455
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6549115920850517
          cur_lr: 5.000000000000001e-05
          entropy: 1.4019896335071989
          entropy_coeff: 0.009999999999999998
          kl: 0.010571283692094606
          policy_loss: 0.04283558999498685
          total_loss: 0.04648300616277589
          vf_explained_var: -0.30391883850097656
          vf_loss: 0.010744052591164493
    num_agent_steps_sampled: 1213000
    num_agent_steps_trained: 1213000
    num_steps_sampled: 1213000
    num_steps_trained: 1213000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1213,52618.9,1213000,-2.9733,-2.21,-4.27,297.33


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1214000
  custom_metrics: {}
  date: 2021-10-25_07-08-58
  done: false
  episode_len_mean: 298.29
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.982899999999981
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4459
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6549115920850517
          cur_lr: 5.000000000000001e-05
          entropy: 1.2982325196266173
          entropy_coeff: 0.009999999999999998
          kl: 0.004144740302756884
          policy_loss: -0.01395660796099239
          total_loss: -0.01114170394010014
          vf_explained_var: 0.09654579311609268
          vf_loss: 0.013082791192250118
    num_agent_steps_sampled: 1214000
    num_agent_steps_trained: 1214000
    num_steps_sampled: 1214000
    num_steps_trained: 12140

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1214,52660.4,1214000,-2.9829,-2.21,-4.27,298.29


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1215000
  custom_metrics: {}
  date: 2021-10-25_07-09-34
  done: false
  episode_len_mean: 300.02
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -3.00019999999998
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 3
  episodes_total: 4462
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32745579604252584
          cur_lr: 5.000000000000001e-05
          entropy: 1.3922498676511976
          entropy_coeff: 0.009999999999999998
          kl: 0.009582510983517286
          policy_loss: 0.04823753899998135
          total_loss: 0.048436038775576486
          vf_explained_var: -0.15018504858016968
          vf_loss: 0.010983149060242189
    num_agent_steps_sampled: 1215000
    num_agent_steps_trained: 1215000
    num_steps_sampled: 1215000
    num_steps_trained: 12150

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1215,52697.1,1215000,-3.0002,-2.21,-4.27,300.02


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1216000
  custom_metrics: {}
  date: 2021-10-25_07-10-16
  done: false
  episode_len_mean: 300.71
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -3.00709999999998
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 3
  episodes_total: 4465
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32745579604252584
          cur_lr: 5.000000000000001e-05
          entropy: 1.2842032684220208
          entropy_coeff: 0.009999999999999998
          kl: 0.011077311947802097
          policy_loss: -0.10828975091377895
          total_loss: -0.10833523157570098
          vf_explained_var: 0.07263855636119843
          vf_loss: 0.00916922004479501
    num_agent_steps_sampled: 1216000
    num_agent_steps_trained: 1216000
    num_steps_sampled: 1216000
    num_steps_trained: 121600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1216,52738.9,1216000,-3.0071,-2.21,-4.27,300.71


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1217000
  custom_metrics: {}
  date: 2021-10-25_07-10-58
  done: false
  episode_len_mean: 301.71
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -3.0170999999999792
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4469
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32745579604252584
          cur_lr: 5.000000000000001e-05
          entropy: 1.2613500118255616
          entropy_coeff: 0.009999999999999998
          kl: 0.009403055141353494
          policy_loss: -0.006169810642798742
          total_loss: -0.0011984774635897743
          vf_explained_var: 0.02727861888706684
          vf_loss: 0.014505744052843914
    num_agent_steps_sampled: 1217000
    num_agent_steps_trained: 1217000
    num_steps_sampled: 1217000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1217,52780.7,1217000,-3.0171,-2.21,-4.27,301.71




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1218000
  custom_metrics: {}
  date: 2021-10-25_07-11-57
  done: false
  episode_len_mean: 301.87
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -3.0186999999999795
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4473
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32745579604252584
          cur_lr: 5.000000000000001e-05
          entropy: 1.233762424521976
          entropy_coeff: 0.009999999999999998
          kl: 0.005601349238601788
          policy_loss: 0.01775463488366869
          total_loss: 0.02095505032274458
          vf_explained_var: 0.03848863020539284
          vf_loss: 0.013703847376422749
    num_agent_steps_sampled: 1218000
    num_agent_steps_trained: 1218000
    num_steps_sampled: 1218000
    num_steps_trained: 121800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1218,52839.5,1218000,-3.0187,-2.21,-4.27,301.87


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1219000
  custom_metrics: {}
  date: 2021-10-25_07-12-37
  done: false
  episode_len_mean: 302.45
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -3.024499999999979
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 3
  episodes_total: 4476
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32745579604252584
          cur_lr: 5.000000000000001e-05
          entropy: 1.2113565100563897
          entropy_coeff: 0.009999999999999998
          kl: 0.0044854417328262074
          policy_loss: -0.10716275746623675
          total_loss: -0.10450837463140487
          vf_explained_var: 0.08693058788776398
          vf_loss: 0.013299161330279377
    num_agent_steps_sampled: 1219000
    num_agent_steps_trained: 1219000
    num_steps_sampled: 1219000
    num_steps_trained: 121

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1219,52879.6,1219000,-3.0245,-2.21,-4.27,302.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1220000
  custom_metrics: {}
  date: 2021-10-25_07-13-18
  done: false
  episode_len_mean: 303.74
  episode_media: {}
  episode_reward_max: -2.3299999999999943
  episode_reward_mean: -3.037399999999979
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4480
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.1464490612347922
          entropy_coeff: 0.009999999999999998
          kl: 0.007761906839027767
          policy_loss: 0.0300829925470882
          total_loss: 0.03250599188937081
          vf_explained_var: 0.03087959811091423
          vf_loss: 0.012616646600266297
    num_agent_steps_sampled: 1220000
    num_agent_steps_trained: 1220000
    num_steps_sampled: 1220000
    num_steps_trained: 122000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1220,52920.8,1220000,-3.0374,-2.33,-4.27,303.74


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1221000
  custom_metrics: {}
  date: 2021-10-25_07-13-58
  done: false
  episode_len_mean: 304.7
  episode_media: {}
  episode_reward_max: -2.3299999999999943
  episode_reward_mean: -3.046999999999979
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4484
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.167348975605435
          entropy_coeff: 0.009999999999999998
          kl: 0.010448157218141605
          policy_loss: 0.02004397147231632
          total_loss: 0.023596782899565166
          vf_explained_var: 0.10320912301540375
          vf_loss: 0.013515645182794994
    num_agent_steps_sampled: 1221000
    num_agent_steps_trained: 1221000
    num_steps_sampled: 1221000
    num_steps_trained: 122100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1221,52960.8,1221000,-3.047,-2.33,-4.27,304.7


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1222000
  custom_metrics: {}
  date: 2021-10-25_07-14-40
  done: false
  episode_len_mean: 305.17
  episode_media: {}
  episode_reward_max: -2.3299999999999943
  episode_reward_mean: -3.0516999999999785
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 3
  episodes_total: 4487
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.1219791385862563
          entropy_coeff: 0.009999999999999998
          kl: 0.006205789597646019
          policy_loss: -0.044275558325979444
          total_loss: -0.0427032815085517
          vf_explained_var: 0.03616766259074211
          vf_loss: 0.011776010216110282
    num_agent_steps_sampled: 1222000
    num_agent_steps_trained: 1222000
    num_steps_sampled: 1222000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1222,53002.8,1222000,-3.0517,-2.33,-4.27,305.17


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1223000
  custom_metrics: {}
  date: 2021-10-25_07-15-23
  done: false
  episode_len_mean: 305.14
  episode_media: {}
  episode_reward_max: -2.3299999999999943
  episode_reward_mean: -3.051399999999979
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4491
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.087063447634379
          entropy_coeff: 0.009999999999999998
          kl: 0.008322120767005793
          policy_loss: -0.04793137146366967
          total_loss: -0.043652720666593973
          vf_explained_var: 0.053019508719444275
          vf_loss: 0.013786720039529934
    num_agent_steps_sampled: 1223000
    num_agent_steps_trained: 1223000
    num_steps_sampled: 1223000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1223,53046,1223000,-3.0514,-2.33,-4.27,305.14


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1224000
  custom_metrics: {}
  date: 2021-10-25_07-16-08
  done: false
  episode_len_mean: 304.81
  episode_media: {}
  episode_reward_max: -2.3299999999999943
  episode_reward_mean: -3.0480999999999785
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4495
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.1474910524156359
          entropy_coeff: 0.009999999999999998
          kl: 0.006662505235974929
          policy_loss: -0.002364087187581592
          total_loss: 0.0014045770797464583
          vf_explained_var: 0.043698616325855255
          vf_loss: 0.014152738141516845
    num_agent_steps_sampled: 1224000
    num_agent_steps_trained: 1224000
    num_steps_sampled: 1224000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1224,53090.3,1224000,-3.0481,-2.33,-4.27,304.81


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1225000
  custom_metrics: {}
  date: 2021-10-25_07-16-51
  done: false
  episode_len_mean: 304.49
  episode_media: {}
  episode_reward_max: -2.3299999999999943
  episode_reward_mean: -3.044899999999979
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4499
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.1647104647424487
          entropy_coeff: 0.009999999999999998
          kl: 0.009062625514159981
          policy_loss: 0.007776515268617206
          total_loss: 0.011682844824261136
          vf_explained_var: 0.037331920117139816
          vf_loss: 0.014069630909297201
    num_agent_steps_sampled: 1225000
    num_agent_steps_trained: 1225000
    num_steps_sampled: 1225000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1225,53134.1,1225000,-3.0449,-2.33,-4.27,304.49




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1226000
  custom_metrics: {}
  date: 2021-10-25_07-17-52
  done: false
  episode_len_mean: 303.91
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.0390999999999786
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4503
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.168889167573717
          entropy_coeff: 0.009999999999999998
          kl: 0.006854068932140301
          policy_loss: 0.042804238034619226
          total_loss: 0.0428591638803482
          vf_explained_var: 0.01485393475741148
          vf_loss: 0.010621611678248478
    num_agent_steps_sampled: 1226000
    num_agent_steps_trained: 1226000
    num_steps_sampled: 1226000
    num_steps_trained: 12260

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1226,53195,1226000,-3.0391,-2.22,-4.27,303.91


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1227000
  custom_metrics: {}
  date: 2021-10-25_07-18-33
  done: false
  episode_len_mean: 303.28
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.032799999999979
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4507
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.2350527935557896
          entropy_coeff: 0.009999999999999998
          kl: 0.009026405692011174
          policy_loss: 0.03075751405623224
          total_loss: 0.03310387544333935
          vf_explained_var: 0.03416399285197258
          vf_loss: 0.013219016221248442
    num_agent_steps_sampled: 1227000
    num_agent_steps_trained: 1227000
    num_steps_sampled: 1227000
    num_steps_trained: 12270

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1227,53235.1,1227000,-3.0328,-2.22,-4.27,303.28


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1228000
  custom_metrics: {}
  date: 2021-10-25_07-19-16
  done: false
  episode_len_mean: 303.31
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.033099999999979
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4511
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.280527040693495
          entropy_coeff: 0.009999999999999998
          kl: 0.011092494004914025
          policy_loss: 0.005078636937671238
          total_loss: 0.00916892257001665
          vf_explained_var: 0.03156336024403572
          vf_loss: 0.015079405996948481
    num_agent_steps_sampled: 1228000
    num_agent_steps_trained: 1228000
    num_steps_sampled: 1228000
    num_steps_trained: 12280

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1228,53278.2,1228000,-3.0331,-2.22,-4.27,303.31


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1229000
  custom_metrics: {}
  date: 2021-10-25_07-19-59
  done: false
  episode_len_mean: 302.85
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.0284999999999784
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 3
  episodes_total: 4514
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.330478298664093
          entropy_coeff: 0.009999999999999998
          kl: 0.009560555298950786
          policy_loss: -0.09970818294419183
          total_loss: -0.09623388912942675
          vf_explained_var: 0.02885088324546814
          vf_loss: 0.015213747912396988
    num_agent_steps_sampled: 1229000
    num_agent_steps_trained: 1229000
    num_steps_sampled: 1229000
    num_steps_trained: 122

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1229,53321.7,1229000,-3.0285,-2.22,-4.27,302.85


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1230000
  custom_metrics: {}
  date: 2021-10-25_07-20-42
  done: false
  episode_len_mean: 302.6
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.0259999999999785
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4518
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.2724921027819316
          entropy_coeff: 0.009999999999999998
          kl: 0.010261314192206723
          policy_loss: -0.0007993525928921169
          total_loss: 0.003069120148817698
          vf_explained_var: 0.04296603798866272
          vf_loss: 0.01491332868528035
    num_agent_steps_sampled: 1230000
    num_agent_steps_trained: 1230000
    num_steps_sampled: 1230000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1230,53364.5,1230000,-3.026,-2.22,-4.27,302.6


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1231000
  custom_metrics: {}
  date: 2021-10-25_07-21-23
  done: false
  episode_len_mean: 300.74
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.007399999999979
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4522
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.2445197237862482
          entropy_coeff: 0.009999999999999998
          kl: 0.006505978981856981
          policy_loss: 0.012914704779783884
          total_loss: 0.016587860220008428
          vf_explained_var: 0.03888962045311928
          vf_loss: 0.015053139771852228
    num_agent_steps_sampled: 1231000
    num_agent_steps_trained: 1231000
    num_steps_sampled: 1231000
    num_steps_trained: 123

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1231,53405.9,1231000,-3.0074,-2.22,-4.27,300.74


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1232000
  custom_metrics: {}
  date: 2021-10-25_07-22-06
  done: false
  episode_len_mean: 297.79
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.9778999999999805
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4526
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.2200003862380981
          entropy_coeff: 0.009999999999999998
          kl: 0.005061795892443301
          policy_loss: 0.02886421564552519
          total_loss: 0.03139229110545582
          vf_explained_var: 0.03515124320983887
          vf_loss: 0.013899321740286217
    num_agent_steps_sampled: 1232000
    num_agent_steps_trained: 1232000
    num_steps_sampled: 1232000
    num_steps_trained: 1232

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1232,53448.7,1232000,-2.9779,-2.22,-4.27,297.79




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1233000
  custom_metrics: {}
  date: 2021-10-25_07-23-07
  done: false
  episode_len_mean: 293.13
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.9312999999999816
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4530
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.2277512510617574
          entropy_coeff: 0.009999999999999998
          kl: 0.005271280882111206
          policy_loss: 0.018998349789116117
          total_loss: 0.022207202182875738
          vf_explained_var: 0.018635690212249756
          vf_loss: 0.01462330637085769
    num_agent_steps_sampled: 1233000
    num_agent_steps_trained: 1233000
    num_steps_sampled: 1233000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1233,53509.5,1233000,-2.9313,-2.22,-4.27,293.13


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1234000
  custom_metrics: {}
  date: 2021-10-25_07-23-54
  done: false
  episode_len_mean: 288.42
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8841999999999826
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4534
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.2702982346216838
          entropy_coeff: 0.009999999999999998
          kl: 0.01283858431377939
          policy_loss: 0.0015947505831718444
          total_loss: 0.0058865189552307125
          vf_explained_var: 0.027574904263019562
          vf_loss: 0.01489271701623996
    num_agent_steps_sampled: 1234000
    num_agent_steps_trained: 1234000
    num_steps_sampled: 1234000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1234,53555.9,1234000,-2.8842,-2.22,-4.27,288.42


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1235000
  custom_metrics: {}
  date: 2021-10-25_07-24-31
  done: false
  episode_len_mean: 284.33
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.843299999999983
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 3
  episodes_total: 4537
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.274930981794993
          entropy_coeff: 0.009999999999999998
          kl: 0.008312366741849066
          policy_loss: 0.01536166916290919
          total_loss: 0.014499812655978732
          vf_explained_var: 0.05108005553483963
          vf_loss: 0.010526485102147692
    num_agent_steps_sampled: 1235000
    num_agent_steps_trained: 1235000
    num_steps_sampled: 1235000
    num_steps_trained: 12350

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1235,53593.6,1235000,-2.8433,-2.22,-4.27,284.33


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1236000
  custom_metrics: {}
  date: 2021-10-25_07-25-15
  done: false
  episode_len_mean: 279.28
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.792799999999984
  episode_reward_min: -4.159999999999956
  episodes_this_iter: 4
  episodes_total: 4541
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.2896628379821777
          entropy_coeff: 0.009999999999999998
          kl: 0.008806007583484328
          policy_loss: 0.0024307601153850554
          total_loss: 0.006453281972143385
          vf_explained_var: 0.02688000537455082
          vf_loss: 0.015477363102965885
    num_agent_steps_sampled: 1236000
    num_agent_steps_trained: 1236000
    num_steps_sampled: 1236000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1236,53637.2,1236000,-2.7928,-2.22,-4.16,279.28


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1237000
  custom_metrics: {}
  date: 2021-10-25_07-25-58
  done: false
  episode_len_mean: 274.49
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.7448999999999852
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 4545
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.2776064766777886
          entropy_coeff: 0.009999999999999998
          kl: 0.01442927587969949
          policy_loss: 0.011876082751486036
          total_loss: 0.016803686072429023
          vf_explained_var: 0.03048723004758358
          vf_loss: 0.015341195412394074
    num_agent_steps_sampled: 1237000
    num_agent_steps_trained: 1237000
    num_steps_sampled: 1237000
    num_steps_trained: 123

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1237,53680.6,1237000,-2.7449,-2.22,-3.99,274.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1238000
  custom_metrics: {}
  date: 2021-10-25_07-26-41
  done: false
  episode_len_mean: 270.26
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.702599999999986
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 4
  episodes_total: 4549
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.2855002416504755
          entropy_coeff: 0.009999999999999998
          kl: 0.006424929878152834
          policy_loss: 0.013310238553418053
          total_loss: 0.0166506204340193
          vf_explained_var: 0.04648962989449501
          vf_loss: 0.015143440353373687
    num_agent_steps_sampled: 1238000
    num_agent_steps_trained: 1238000
    num_steps_sampled: 1238000
    num_steps_trained: 12380

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1238,53722.9,1238000,-2.7026,-2.22,-3.9,270.26


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1239000
  custom_metrics: {}
  date: 2021-10-25_07-27-25
  done: false
  episode_len_mean: 267.38
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.673799999999987
  episode_reward_min: -3.46999999999997
  episodes_this_iter: 4
  episodes_total: 4553
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.245454035864936
          entropy_coeff: 0.009999999999999998
          kl: 0.011760931237057548
          policy_loss: 0.010179286822676659
          total_loss: 0.014667604449722501
          vf_explained_var: 0.03191053122282028
          vf_loss: 0.015017265402194526
    num_agent_steps_sampled: 1239000
    num_agent_steps_trained: 1239000
    num_steps_sampled: 1239000
    num_steps_trained: 12390

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1239,53766.9,1239000,-2.6738,-2.22,-3.47,267.38


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1240000
  custom_metrics: {}
  date: 2021-10-25_07-28-09
  done: false
  episode_len_mean: 265.73
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.657299999999987
  episode_reward_min: -3.369999999999972
  episodes_this_iter: 3
  episodes_total: 4556
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.1844294349352518
          entropy_coeff: 0.009999999999999998
          kl: 0.01016736689491419
          policy_loss: -0.10798748847511079
          total_loss: -0.10285585646828016
          vf_explained_var: 0.05026211589574814
          vf_loss: 0.01531124096363783
    num_agent_steps_sampled: 1240000
    num_agent_steps_trained: 1240000
    num_steps_sampled: 1240000
    num_steps_trained: 12400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1240,53811.1,1240000,-2.6573,-2.22,-3.37,265.73




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1241000
  custom_metrics: {}
  date: 2021-10-25_07-29-12
  done: false
  episode_len_mean: 263.8
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.6379999999999875
  episode_reward_min: -3.369999999999972
  episodes_this_iter: 5
  episodes_total: 4561
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.1737787220213147
          entropy_coeff: 0.009999999999999998
          kl: 0.007172135894317459
          policy_loss: -0.009478768540753258
          total_loss: -0.003697421484523349
          vf_explained_var: 0.07607460767030716
          vf_loss: 0.016344852269523674
    num_agent_steps_sampled: 1241000
    num_agent_steps_trained: 1241000
    num_steps_sampled: 1241000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1241,53874.1,1241000,-2.638,-2.15,-3.37,263.8


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1242000
  custom_metrics: {}
  date: 2021-10-25_07-29-58
  done: false
  episode_len_mean: 262.19
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.6218999999999877
  episode_reward_min: -2.909999999999982
  episodes_this_iter: 4
  episodes_total: 4565
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 1.1379122548633152
          entropy_coeff: 0.009999999999999998
          kl: 0.004275965238580978
          policy_loss: 0.01977663760383924
          total_loss: 0.023985251370403502
          vf_explained_var: 0.07640951126813889
          vf_loss: 0.01488764135994845
    num_agent_steps_sampled: 1242000
    num_agent_steps_trained: 1242000
    num_steps_sampled: 1242000
    num_steps_trained: 12420

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1242,53920.3,1242000,-2.6219,-2.15,-2.91,262.19


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1243000
  custom_metrics: {}
  date: 2021-10-25_07-30-38
  done: false
  episode_len_mean: 260.86
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.608599999999988
  episode_reward_min: -2.8799999999999826
  episodes_this_iter: 4
  episodes_total: 4569
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.1954575525389777
          entropy_coeff: 0.009999999999999998
          kl: 0.016420152840885688
          policy_loss: 0.029426183882686828
          total_loss: 0.03133153758115238
          vf_explained_var: 0.10567619651556015
          vf_loss: 0.01251571107034882
    num_agent_steps_sampled: 1243000
    num_agent_steps_trained: 1243000
    num_steps_sampled: 1243000
    num_steps_trained: 12430

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1243,53960.3,1243000,-2.6086,-2.15,-2.88,260.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1244000
  custom_metrics: {}
  date: 2021-10-25_07-31-24
  done: false
  episode_len_mean: 260.32
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.603199999999988
  episode_reward_min: -2.8799999999999826
  episodes_this_iter: 4
  episodes_total: 4573
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.225123445192973
          entropy_coeff: 0.009999999999999998
          kl: 0.009400158758055315
          policy_loss: 0.012081257088316812
          total_loss: 0.015373592409822676
          vf_explained_var: 0.050995610654354095
          vf_loss: 0.014774032764964633
    num_agent_steps_sampled: 1244000
    num_agent_steps_trained: 1244000
    num_steps_sampled: 1244000
    num_steps_trained: 124

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1244,54006.4,1244000,-2.6032,-2.15,-2.88,260.32


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1245000
  custom_metrics: {}
  date: 2021-10-25_07-32-10
  done: false
  episode_len_mean: 259.28
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.5927999999999884
  episode_reward_min: -2.8799999999999826
  episodes_this_iter: 4
  episodes_total: 4577
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.2241251044803196
          entropy_coeff: 0.009999999999999998
          kl: 0.00813339129603542
          policy_loss: 0.01867017149925232
          total_loss: 0.021792010300689273
          vf_explained_var: 0.07214321196079254
          vf_loss: 0.014697260637250211
    num_agent_steps_sampled: 1245000
    num_agent_steps_trained: 1245000
    num_steps_sampled: 1245000
    num_steps_trained: 1245

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1245,54051.8,1245000,-2.5928,-2.15,-2.88,259.28


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1246000
  custom_metrics: {}
  date: 2021-10-25_07-32-56
  done: false
  episode_len_mean: 257.76
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.5775999999999892
  episode_reward_min: -2.8799999999999826
  episodes_this_iter: 4
  episodes_total: 4581
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.211210960812039
          entropy_coeff: 0.009999999999999998
          kl: 0.0064827887582086735
          policy_loss: 0.025086262159877352
          total_loss: 0.02777980433570014
          vf_explained_var: 0.04304874315857887
          vf_loss: 0.014274945089386568
    num_agent_steps_sampled: 1246000
    num_agent_steps_trained: 1246000
    num_steps_sampled: 1246000
    num_steps_trained: 124

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1246,54097.9,1246000,-2.5776,-2.15,-2.88,257.76


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1247000
  custom_metrics: {}
  date: 2021-10-25_07-33-40
  done: false
  episode_len_mean: 256.67
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.5666999999999893
  episode_reward_min: -2.819999999999984
  episodes_this_iter: 4
  episodes_total: 4585
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.2352522916264004
          entropy_coeff: 0.009999999999999998
          kl: 0.009480250249675137
          policy_loss: 0.03706268982754813
          total_loss: 0.037204925219217935
          vf_explained_var: 0.018507830798625946
          vf_loss: 0.011718668726583321
    num_agent_steps_sampled: 1247000
    num_agent_steps_trained: 1247000
    num_steps_sampled: 1247000
    num_steps_trained: 124

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1247,54142.1,1247000,-2.5667,-2.15,-2.82,256.67


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1248000
  custom_metrics: {}
  date: 2021-10-25_07-34-25
  done: false
  episode_len_mean: 256.0
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.559999999999989
  episode_reward_min: -2.809999999999984
  episodes_this_iter: 4
  episodes_total: 4589
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.1835050900777182
          entropy_coeff: 0.009999999999999998
          kl: 0.014463965310543555
          policy_loss: 0.039792543401320775
          total_loss: 0.04079676651292377
          vf_explained_var: 0.024016698822379112
          vf_loss: 0.011655198275629016
    num_agent_steps_sampled: 1248000
    num_agent_steps_trained: 1248000
    num_steps_sampled: 1248000
    num_steps_trained: 12480

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1248,54187.1,1248000,-2.56,-2.15,-2.81,256




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1249000
  custom_metrics: {}
  date: 2021-10-25_07-35-31
  done: false
  episode_len_mean: 255.2
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.5519999999999894
  episode_reward_min: -2.809999999999984
  episodes_this_iter: 4
  episodes_total: 4593
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.2205150233374702
          entropy_coeff: 0.009999999999999998
          kl: 0.01140173787129552
          policy_loss: -0.002559378743171692
          total_loss: 0.0019232048756546445
          vf_explained_var: 0.04566382244229317
          vf_loss: 0.015754341447932852
    num_agent_steps_sampled: 1249000
    num_agent_steps_trained: 1249000
    num_steps_sampled: 1249000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1249,54252.8,1249000,-2.552,-2.12,-2.81,255.2


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1250000
  custom_metrics: {}
  date: 2021-10-25_07-36-18
  done: false
  episode_len_mean: 254.91
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.54909999999999
  episode_reward_min: -2.809999999999984
  episodes_this_iter: 4
  episodes_total: 4597
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.186074956258138
          entropy_coeff: 0.009999999999999998
          kl: 0.0073774277100974095
          policy_loss: -0.0070435039699077604
          total_loss: -0.002861966234114435
          vf_explained_var: 0.06778863817453384
          vf_loss: 0.015438343232704533
    num_agent_steps_sampled: 1250000
    num_agent_steps_trained: 1250000
    num_steps_sampled: 1250000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1250,54299.6,1250000,-2.5491,-2.12,-2.81,254.91


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1251000
  custom_metrics: {}
  date: 2021-10-25_07-36-58
  done: false
  episode_len_mean: 255.44
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.5543999999999896
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 4601
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.441129249996609
          entropy_coeff: 0.009999999999999998
          kl: 0.01896874517639557
          policy_loss: 0.0026965882629156114
          total_loss: 0.005222839696539773
          vf_explained_var: 0.13221533596515656
          vf_loss: 0.015384688238716788
    num_agent_steps_sampled: 1251000
    num_agent_steps_trained: 1251000
    num_steps_sampled: 1251000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1251,54339.5,1251000,-2.5544,-2.12,-3.27,255.44


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1252000
  custom_metrics: {}
  date: 2021-10-25_07-37-43
  done: false
  episode_len_mean: 254.64
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.5463999999999896
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 4605
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.392805818716685
          entropy_coeff: 0.009999999999999998
          kl: 0.013077969508697457
          policy_loss: 0.03686760912338893
          total_loss: 0.03773785341117117
          vf_explained_var: 0.1881573349237442
          vf_loss: 0.013727687682128615
    num_agent_steps_sampled: 1252000
    num_agent_steps_trained: 1252000
    num_steps_sampled: 1252000
    num_steps_trained: 12520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1252,54384.9,1252000,-2.5464,-2.12,-3.27,254.64


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1253000
  custom_metrics: {}
  date: 2021-10-25_07-38-28
  done: false
  episode_len_mean: 254.47
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.544699999999989
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 4609
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.3844724403487312
          entropy_coeff: 0.009999999999999998
          kl: 0.015605783090817284
          policy_loss: 0.007548200007941988
          total_loss: 0.009419530712895923
          vf_explained_var: 0.20878741145133972
          vf_loss: 0.014438504880915085
    num_agent_steps_sampled: 1253000
    num_agent_steps_trained: 1253000
    num_steps_sampled: 1253000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1253,54430.1,1253000,-2.5447,-2.12,-3.27,254.47


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1254000
  custom_metrics: {}
  date: 2021-10-25_07-39-15
  done: false
  episode_len_mean: 253.81
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.5380999999999903
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 4613
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.451934924390581
          entropy_coeff: 0.009999999999999998
          kl: 0.014760150689292572
          policy_loss: 0.016749928146600722
          total_loss: 0.017605336672729916
          vf_explained_var: 0.24619919061660767
          vf_loss: 0.014166432256913847
    num_agent_steps_sampled: 1254000
    num_agent_steps_trained: 1254000
    num_steps_sampled: 1254000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1254,54476.4,1254000,-2.5381,-2.12,-3.27,253.81


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1255000
  custom_metrics: {}
  date: 2021-10-25_07-39-58
  done: false
  episode_len_mean: 253.98
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.53979999999999
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 4617
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.5003063281377156
          entropy_coeff: 0.009999999999999998
          kl: 0.014035281144712917
          policy_loss: -0.03833746574819088
          total_loss: -0.03855704085694419
          vf_explained_var: 0.31026947498321533
          vf_loss: 0.013634506147354841
    num_agent_steps_sampled: 1255000
    num_agent_steps_trained: 1255000
    num_steps_sampled: 1255000
    num_steps_trained: 125

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1255,54520.1,1255000,-2.5398,-2.12,-3.27,253.98




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1256000
  custom_metrics: {}
  date: 2021-10-25_07-41-01
  done: false
  episode_len_mean: 253.43
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.53429999999999
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 4621
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.4321156289842394
          entropy_coeff: 0.009999999999999998
          kl: 0.008414302477873292
          policy_loss: -0.000600607693195343
          total_loss: -0.0007599989987081951
          vf_explained_var: 0.3295480012893677
          vf_loss: 0.01347293390168084
    num_agent_steps_sampled: 1256000
    num_agent_steps_trained: 1256000
    num_steps_sampled: 1256000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1256,54582.3,1256000,-2.5343,-2.12,-3.27,253.43


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1257000
  custom_metrics: {}
  date: 2021-10-25_07-41-42
  done: false
  episode_len_mean: 254.26
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.5425999999999895
  episode_reward_min: -3.559999999999968
  episodes_this_iter: 3
  episodes_total: 4624
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.5377004981040954
          entropy_coeff: 0.009999999999999998
          kl: 0.017770219816319214
          policy_loss: 0.056659820427497225
          total_loss: 0.052856885227892135
          vf_explained_var: 0.4223393201828003
          vf_loss: 0.010119329253211618
    num_agent_steps_sampled: 1257000
    num_agent_steps_trained: 1257000
    num_steps_sampled: 1257000
    num_steps_trained: 125

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1257,54623.2,1257000,-2.5426,-2.12,-3.56,254.26


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1258000
  custom_metrics: {}
  date: 2021-10-25_07-42-26
  done: false
  episode_len_mean: 254.1
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.5409999999999893
  episode_reward_min: -3.559999999999968
  episodes_this_iter: 4
  episodes_total: 4628
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.420412117905087
          entropy_coeff: 0.009999999999999998
          kl: 0.009937117633030113
          policy_loss: 0.020130385872390535
          total_loss: 0.02027378421690729
          vf_explained_var: 0.25570186972618103
          vf_loss: 0.013534025682343378
    num_agent_steps_sampled: 1258000
    num_agent_steps_trained: 1258000
    num_steps_sampled: 1258000
    num_steps_trained: 12580

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1258,54667.4,1258000,-2.541,-2.12,-3.56,254.1


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1259000
  custom_metrics: {}
  date: 2021-10-25_07-43-07
  done: false
  episode_len_mean: 254.54
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.5453999999999892
  episode_reward_min: -3.559999999999968
  episodes_this_iter: 4
  episodes_total: 4632
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.454416525363922
          entropy_coeff: 0.009999999999999998
          kl: 0.01344372303478107
          policy_loss: 0.005013319270478355
          total_loss: 0.005967989936470986
          vf_explained_var: 0.22426292300224304
          vf_loss: 0.014398276247084141
    num_agent_steps_sampled: 1259000
    num_agent_steps_trained: 1259000
    num_steps_sampled: 1259000
    num_steps_trained: 1259

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1259,54709,1259000,-2.5454,-2.12,-3.56,254.54


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1260000
  custom_metrics: {}
  date: 2021-10-25_07-43-50
  done: false
  episode_len_mean: 254.41
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.5440999999999896
  episode_reward_min: -3.559999999999968
  episodes_this_iter: 4
  episodes_total: 4636
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.3782635847727458
          entropy_coeff: 0.009999999999999998
          kl: 0.00815786259726947
          policy_loss: 0.003367236877481143
          total_loss: 0.004301896732714441
          vf_explained_var: 0.21868222951889038
          vf_loss: 0.01404946307755179
    num_agent_steps_sampled: 1260000
    num_agent_steps_trained: 1260000
    num_steps_sampled: 1260000
    num_steps_trained: 1260

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1260,54752,1260000,-2.5441,-2.12,-3.56,254.41


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1261000
  custom_metrics: {}
  date: 2021-10-25_07-44-34
  done: false
  episode_len_mean: 254.49
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.5448999999999895
  episode_reward_min: -3.559999999999968
  episodes_this_iter: 3
  episodes_total: 4639
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.3144915699958801
          entropy_coeff: 0.009999999999999998
          kl: 0.013708703875517772
          policy_loss: -0.0690639285577668
          total_loss: -0.06931481113036474
          vf_explained_var: 0.2716502249240875
          vf_loss: 0.011771781054428882
    num_agent_steps_sampled: 1261000
    num_agent_steps_trained: 1261000
    num_steps_sampled: 1261000
    num_steps_trained: 1261

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1261,54795.6,1261000,-2.5449,-2.12,-3.56,254.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1262000
  custom_metrics: {}
  date: 2021-10-25_07-45-21
  done: false
  episode_len_mean: 253.79
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.53789999999999
  episode_reward_min: -3.559999999999968
  episodes_this_iter: 4
  episodes_total: 4643
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.2694992237620883
          entropy_coeff: 0.009999999999999998
          kl: 0.009831561689913334
          policy_loss: -0.125968876770801
          total_loss: -0.1206559261928002
          vf_explained_var: 0.2388518899679184
          vf_loss: 0.017203093144214816
    num_agent_steps_sampled: 1262000
    num_agent_steps_trained: 1262000
    num_steps_sampled: 1262000
    num_steps_trained: 1262000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1262,54842.6,1262000,-2.5379,-2.12,-3.56,253.79


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1263000
  custom_metrics: {}
  date: 2021-10-25_07-46-06
  done: false
  episode_len_mean: 253.33
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.5332999999999894
  episode_reward_min: -3.559999999999968
  episodes_this_iter: 4
  episodes_total: 4647
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.2584219482209948
          entropy_coeff: 0.009999999999999998
          kl: 0.009026017133218841
          policy_loss: -0.11729405547181765
          total_loss: -0.11460700842241446
          vf_explained_var: 0.29725944995880127
          vf_loss: 0.014532360268963708
    num_agent_steps_sampled: 1263000
    num_agent_steps_trained: 1263000
    num_steps_sampled: 1263000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1263,54887.7,1263000,-2.5333,-2.12,-3.56,253.33




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1264000
  custom_metrics: {}
  date: 2021-10-25_07-47-11
  done: false
  episode_len_mean: 252.33
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.52329999999999
  episode_reward_min: -3.559999999999968
  episodes_this_iter: 5
  episodes_total: 4652
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.1215226809183756
          entropy_coeff: 0.009999999999999998
          kl: 0.009874332887661316
          policy_loss: -0.008780168741941452
          total_loss: -0.003658158911599053
          vf_explained_var: 0.20594851672649384
          vf_loss: 0.015528882915774981
    num_agent_steps_sampled: 1264000
    num_agent_steps_trained: 1264000
    num_steps_sampled: 1264000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1264,54952.8,1264000,-2.5233,-2.12,-3.56,252.33


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1265000
  custom_metrics: {}
  date: 2021-10-25_07-47-58
  done: false
  episode_len_mean: 251.78
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.51779999999999
  episode_reward_min: -3.559999999999968
  episodes_this_iter: 4
  episodes_total: 4656
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.0387644648551941
          entropy_coeff: 0.009999999999999998
          kl: 0.008540837186346116
          policy_loss: 0.006643359859784444
          total_loss: 0.009845274355676439
          vf_explained_var: 0.13648773729801178
          vf_loss: 0.012890370935201645
    num_agent_steps_sampled: 1265000
    num_agent_steps_trained: 1265000
    num_steps_sampled: 1265000
    num_steps_trained: 1265

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1265,54999.3,1265000,-2.5178,-2.12,-3.56,251.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1266000
  custom_metrics: {}
  date: 2021-10-25_07-48-42
  done: false
  episode_len_mean: 251.25
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.51249999999999
  episode_reward_min: -3.559999999999968
  episodes_this_iter: 4
  episodes_total: 4660
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.021735543674893
          entropy_coeff: 0.009999999999999998
          kl: 0.007383433348812534
          policy_loss: 0.02892812622918023
          total_loss: 0.032460004008478587
          vf_explained_var: 0.1012115404009819
          vf_loss: 0.013144794685973061
    num_agent_steps_sampled: 1266000
    num_agent_steps_trained: 1266000
    num_steps_sampled: 1266000
    num_steps_trained: 1266000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1266,55043.3,1266000,-2.5125,-2.12,-3.56,251.25


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1267000
  custom_metrics: {}
  date: 2021-10-25_07-49-26
  done: false
  episode_len_mean: 252.53
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.52529999999999
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4664
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08186394901063146
          cur_lr: 5.000000000000001e-05
          entropy: 1.2265741321775647
          entropy_coeff: 0.009999999999999998
          kl: 0.022642773682864163
          policy_loss: 0.009810600264204872
          total_loss: 0.01853480628795094
          vf_explained_var: -0.020434292033314705
          vf_loss: 0.019136320693521863
    num_agent_steps_sampled: 1267000
    num_agent_steps_trained: 1267000
    num_steps_sampled: 1267000
    num_steps_trained: 126

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1267,55086.9,1267000,-2.5253,-2.12,-4.27,252.53


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1268000
  custom_metrics: {}
  date: 2021-10-25_07-50-14
  done: false
  episode_len_mean: 251.94
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.5193999999999894
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4668
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12279592351594722
          cur_lr: 5.000000000000001e-05
          entropy: 1.048729595873091
          entropy_coeff: 0.009999999999999998
          kl: 0.009937596857673824
          policy_loss: 0.019466672589381537
          total_loss: 0.0242314166492886
          vf_explained_var: 0.02155217155814171
          vf_loss: 0.014031737815174791
    num_agent_steps_sampled: 1268000
    num_agent_steps_trained: 1268000
    num_steps_sampled: 1268000
    num_steps_trained: 12680

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1268,55135,1268000,-2.5194,-2.12,-4.27,251.94


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1269000
  custom_metrics: {}
  date: 2021-10-25_07-51-02
  done: false
  episode_len_mean: 251.42
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.51419999999999
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4672
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12279592351594722
          cur_lr: 5.000000000000001e-05
          entropy: 1.0821611470646328
          entropy_coeff: 0.009999999999999998
          kl: 0.0065944832571883044
          policy_loss: 0.0006612800889545017
          total_loss: 0.004662638819879956
          vf_explained_var: 0.05401596054434776
          vf_loss: 0.014013194758445025
    num_agent_steps_sampled: 1269000
    num_agent_steps_trained: 1269000
    num_steps_sampled: 1269000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1269,55183.7,1269000,-2.5142,-2.12,-4.27,251.42


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1270000
  custom_metrics: {}
  date: 2021-10-25_07-51-52
  done: false
  episode_len_mean: 250.57
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.5056999999999903
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 5
  episodes_total: 4677
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12279592351594722
          cur_lr: 5.000000000000001e-05
          entropy: 1.0472578008969624
          entropy_coeff: 0.009999999999999998
          kl: 0.02008056203995636
          policy_loss: -0.03342201192345884
          total_loss: -0.02397736513780223
          vf_explained_var: 0.0831621065735817
          vf_loss: 0.017451413813978432
    num_agent_steps_sampled: 1270000
    num_agent_steps_trained: 1270000
    num_steps_sampled: 1270000
    num_steps_trained: 1270

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1270,55232.9,1270000,-2.5057,-2.12,-4.27,250.57




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1271000
  custom_metrics: {}
  date: 2021-10-25_07-52-56
  done: false
  episode_len_mean: 250.18
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.50179999999999
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 4681
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 1.161407134268019
          entropy_coeff: 0.009999999999999998
          kl: 0.007873791431734705
          policy_loss: 0.014207588053411907
          total_loss: 0.017123854243093067
          vf_explained_var: 0.19268639385700226
          vf_loss: 0.01308003088666333
    num_agent_steps_sampled: 1271000
    num_agent_steps_trained: 1271000
    num_steps_sampled: 1271000
    num_steps_trained: 127100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1271,55297.2,1271000,-2.5018,-2.12,-4.27,250.18


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1272000
  custom_metrics: {}
  date: 2021-10-25_07-53-37
  done: false
  episode_len_mean: 249.68
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4967999999999906
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 3
  episodes_total: 4684
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 1.2191129724184673
          entropy_coeff: 0.009999999999999998
          kl: 0.012850693729822638
          policy_loss: -0.14098706733849314
          total_loss: -0.13790103668967882
          vf_explained_var: 0.2141445428133011
          vf_loss: 0.01291014114394784
    num_agent_steps_sampled: 1272000
    num_agent_steps_trained: 1272000
    num_steps_sampled: 1272000
    num_steps_trained: 1272

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1272,55338.3,1272000,-2.4968,-2.12,-4.27,249.68


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1273000
  custom_metrics: {}
  date: 2021-10-25_07-54-11
  done: false
  episode_len_mean: 253.15
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.53149999999999
  episode_reward_min: -4.41999999999995
  episodes_this_iter: 3
  episodes_total: 4687
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 1.4776905126041837
          entropy_coeff: 0.009999999999999998
          kl: 0.014560818857018789
          policy_loss: -0.12611928979555767
          total_loss: -0.1247764321664969
          vf_explained_var: 0.18624185025691986
          vf_loss: 0.013437750066320102
    num_agent_steps_sampled: 1273000
    num_agent_steps_trained: 1273000
    num_steps_sampled: 1273000
    num_steps_trained: 127300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1273,55372.1,1273000,-2.5315,-2.12,-4.42,253.15


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1274000
  custom_metrics: {}
  date: 2021-10-25_07-54-47
  done: false
  episode_len_mean: 254.98
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.5497999999999896
  episode_reward_min: -4.41999999999995
  episodes_this_iter: 3
  episodes_total: 4690
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 1.2793406678570642
          entropy_coeff: 0.009999999999999998
          kl: 0.015809520094973686
          policy_loss: -0.12093911833233303
          total_loss: -0.11760653133193652
          vf_explained_var: 0.17988723516464233
          vf_loss: 0.013213976348439852
    num_agent_steps_sampled: 1274000
    num_agent_steps_trained: 1274000
    num_steps_sampled: 1274000
    num_steps_trained: 127

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1274,55408.4,1274000,-2.5498,-2.14,-4.42,254.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1275000
  custom_metrics: {}
  date: 2021-10-25_07-55-18
  done: false
  episode_len_mean: 260.42
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.604199999999989
  episode_reward_min: -4.41999999999995
  episodes_this_iter: 3
  episodes_total: 4693
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 1.6922131405936347
          entropy_coeff: 0.009999999999999998
          kl: 0.012363751190664926
          policy_loss: 0.03752797941366832
          total_loss: 0.03401515781879425
          vf_explained_var: 0.2611871361732483
          vf_loss: 0.011131984986261362
    num_agent_steps_sampled: 1275000
    num_agent_steps_trained: 1275000
    num_steps_sampled: 1275000
    num_steps_trained: 1275000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1275,55439.1,1275000,-2.6042,-2.14,-4.42,260.42


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1276000
  custom_metrics: {}
  date: 2021-10-25_07-55-48
  done: false
  episode_len_mean: 262.38
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.623799999999988
  episode_reward_min: -4.569999999999947
  episodes_this_iter: 2
  episodes_total: 4695
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 1.6698240915934244
          entropy_coeff: 0.009999999999999998
          kl: 0.015603313197549875
          policy_loss: -0.10394489434030321
          total_loss: -0.10761332147651248
          vf_explained_var: 0.08367642015218735
          vf_loss: 0.010155781747501653
    num_agent_steps_sampled: 1276000
    num_agent_steps_trained: 1276000
    num_steps_sampled: 1276000
    num_steps_trained: 127

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1276,55469.4,1276000,-2.6238,-2.14,-4.57,262.38


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1277000
  custom_metrics: {}
  date: 2021-10-25_07-56-25
  done: false
  episode_len_mean: 265.33
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.653299999999988
  episode_reward_min: -4.599999999999946
  episodes_this_iter: 4
  episodes_total: 4699
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 1.4969941470358106
          entropy_coeff: 0.009999999999999998
          kl: 0.013109664000834522
          policy_loss: -0.02293874513771799
          total_loss: -0.019524660044246248
          vf_explained_var: 0.06739534437656403
          vf_loss: 0.015969306427157586
    num_agent_steps_sampled: 1277000
    num_agent_steps_trained: 1277000
    num_steps_sampled: 1277000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1277,55505.8,1277000,-2.6533,-2.14,-4.6,265.33


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1278000
  custom_metrics: {}
  date: 2021-10-25_07-56-57
  done: false
  episode_len_mean: 267.07
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.6706999999999868
  episode_reward_min: -4.599999999999946
  episodes_this_iter: 2
  episodes_total: 4701
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 1.5077449136310153
          entropy_coeff: 0.009999999999999998
          kl: 0.01824671357258089
          policy_loss: -0.09557209114233653
          total_loss: -0.09735303769508998
          vf_explained_var: -0.1994054615497589
          vf_loss: 0.009935565564632674
    num_agent_steps_sampled: 1278000
    num_agent_steps_trained: 1278000
    num_steps_sampled: 1278000
    num_steps_trained: 127

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1278,55538.2,1278000,-2.6707,-2.14,-4.6,267.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1279000
  custom_metrics: {}
  date: 2021-10-25_07-57-31
  done: false
  episode_len_mean: 272.16
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.7215999999999863
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 4704
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 1.5210505710707771
          entropy_coeff: 0.009999999999999998
          kl: 0.01756547837738377
          policy_loss: -0.0050014927983284
          total_loss: -0.00628666447268592
          vf_explained_var: -0.2547774910926819
          vf_loss: 0.010689881443977357
    num_agent_steps_sampled: 1279000
    num_agent_steps_trained: 1279000
    num_steps_sampled: 1279000
    num_steps_trained: 1279

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1279,55572.2,1279000,-2.7216,-2.14,-5.42,272.16


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1280000
  custom_metrics: {}
  date: 2021-10-25_07-58-19
  done: false
  episode_len_mean: 271.53
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.7152999999999863
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4708
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 0.9983964701493581
          entropy_coeff: 0.009999999999999998
          kl: 0.0060344097586929974
          policy_loss: -0.026190304756164552
          total_loss: -0.02027985437048806
          vf_explained_var: 0.07568804919719696
          vf_loss: 0.014782912366920047
    num_agent_steps_sampled: 1280000
    num_agent_steps_trained: 1280000
    num_steps_sampled: 1280000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1280,55619.6,1280000,-2.7153,-2.14,-5.42,271.53




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1281000
  custom_metrics: {}
  date: 2021-10-25_07-59-08
  done: false
  episode_len_mean: 273.56
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.7355999999999856
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 4711
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 1.517660477426317
          entropy_coeff: 0.009999999999999998
          kl: 0.013438471130377631
          policy_loss: -0.10182987401882808
          total_loss: -0.10072682988312509
          vf_explained_var: 0.2940792739391327
          vf_loss: 0.013804364007794195
    num_agent_steps_sampled: 1281000
    num_agent_steps_trained: 1281000
    num_steps_sampled: 1281000
    num_steps_trained: 1281

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1281,55669,1281000,-2.7356,-2.14,-5.42,273.56


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1282000
  custom_metrics: {}
  date: 2021-10-25_07-59-45
  done: false
  episode_len_mean: 275.46
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.754599999999985
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 4714
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 1.3481858068042332
          entropy_coeff: 0.009999999999999998
          kl: 0.010714834858040747
          policy_loss: -0.09849653583433893
          total_loss: -0.09739709405435457
          vf_explained_var: 0.25027820467948914
          vf_loss: 0.012607692502852944
    num_agent_steps_sampled: 1282000
    num_agent_steps_trained: 1282000
    num_steps_sampled: 1282000
    num_steps_trained: 128

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1282,55706.1,1282000,-2.7546,-2.14,-5.42,275.46


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1283000
  custom_metrics: {}
  date: 2021-10-25_08-00-18
  done: false
  episode_len_mean: 278.82
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.788199999999984
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 4717
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 1.466218305958642
          entropy_coeff: 0.009999999999999998
          kl: 0.014550245034455303
          policy_loss: 0.041139673027727336
          total_loss: 0.03914712882704205
          vf_explained_var: 0.29796484112739563
          vf_loss: 0.009989573437875758
    num_agent_steps_sampled: 1283000
    num_agent_steps_trained: 1283000
    num_steps_sampled: 1283000
    num_steps_trained: 12830

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1283,55738.8,1283000,-2.7882,-2.14,-5.42,278.82


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1284000
  custom_metrics: {}
  date: 2021-10-25_08-00-50
  done: false
  episode_len_mean: 282.27
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.8226999999999838
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 4720
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 1.4594102488623726
          entropy_coeff: 0.009999999999999998
          kl: 0.012518807175618936
          policy_loss: 0.048190059181716705
          total_loss: 0.04645169319378005
          vf_explained_var: -0.049892641603946686
          vf_loss: 0.010549847309529367
    num_agent_steps_sampled: 1284000
    num_agent_steps_trained: 1284000
    num_steps_sampled: 1284000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1284,55771,1284000,-2.8227,-2.14,-5.42,282.27


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1285000
  custom_metrics: {}
  date: 2021-10-25_08-01-35
  done: false
  episode_len_mean: 281.24
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.812399999999984
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4724
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 1.121012564500173
          entropy_coeff: 0.009999999999999998
          kl: 0.010457759984759857
          policy_loss: 0.028922554271088707
          total_loss: 0.0316365550375647
          vf_explained_var: 0.17685426771640778
          vf_loss: 0.011997869062340922
    num_agent_steps_sampled: 1285000
    num_agent_steps_trained: 1285000
    num_steps_sampled: 1285000
    num_steps_trained: 128500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1285,55816.1,1285000,-2.8124,-2.14,-5.42,281.24


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1286000
  custom_metrics: {}
  date: 2021-10-25_08-02-17
  done: false
  episode_len_mean: 280.95
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.809499999999984
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 4727
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 1.1383410970369974
          entropy_coeff: 0.009999999999999998
          kl: 0.004499348461279832
          policy_loss: -0.07418479836649365
          total_loss: -0.07289279831780328
          vf_explained_var: 0.1462625414133072
          vf_loss: 0.011846655576179425
    num_agent_steps_sampled: 1286000
    num_agent_steps_trained: 1286000
    num_steps_sampled: 1286000
    num_steps_trained: 1286

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1286,55857.8,1286000,-2.8095,-2.14,-5.42,280.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1287000
  custom_metrics: {}
  date: 2021-10-25_08-03-00
  done: false
  episode_len_mean: 281.68
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.8167999999999846
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4731
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09209694263696042
          cur_lr: 5.000000000000001e-05
          entropy: 1.2017305162217882
          entropy_coeff: 0.009999999999999998
          kl: 0.016482046628604783
          policy_loss: -0.027365478459331724
          total_loss: -0.023992113106780583
          vf_explained_var: 0.11908403038978577
          vf_loss: 0.013872724906024005
    num_agent_steps_sampled: 1287000
    num_agent_steps_trained: 1287000
    num_steps_sampled: 1287000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1287,55900.9,1287000,-2.8168,-2.14,-5.42,281.68


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1288000
  custom_metrics: {}
  date: 2021-10-25_08-03-43
  done: false
  episode_len_mean: 281.12
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.811199999999984
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4735
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09209694263696042
          cur_lr: 5.000000000000001e-05
          entropy: 1.1526155246628655
          entropy_coeff: 0.009999999999999998
          kl: 0.007863658834188433
          policy_loss: -0.019901368021965026
          total_loss: -0.015377838247352177
          vf_explained_var: 0.07929309457540512
          vf_loss: 0.015325466377867593
    num_agent_steps_sampled: 1288000
    num_agent_steps_trained: 1288000
    num_steps_sampled: 1288000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1288,55943.6,1288000,-2.8112,-2.14,-5.42,281.12


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1289000
  custom_metrics: {}
  date: 2021-10-25_08-04-25
  done: false
  episode_len_mean: 282.17
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.821699999999983
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4739
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09209694263696042
          cur_lr: 5.000000000000001e-05
          entropy: 1.3883609188927544
          entropy_coeff: 0.009999999999999998
          kl: 0.012392071786808224
          policy_loss: -0.00255049682325787
          total_loss: -0.0017628939615355597
          vf_explained_var: 0.16020509600639343
          vf_loss: 0.01352993817999959
    num_agent_steps_sampled: 1289000
    num_agent_steps_trained: 1289000
    num_steps_sampled: 1289000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1289,55985.4,1289000,-2.8217,-2.14,-5.42,282.17




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1290000
  custom_metrics: {}
  date: 2021-10-25_08-05-30
  done: false
  episode_len_mean: 282.01
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.8200999999999836
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4743
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09209694263696042
          cur_lr: 5.000000000000001e-05
          entropy: 1.1680288407537671
          entropy_coeff: 0.009999999999999998
          kl: 0.00677065463355433
          policy_loss: 0.01506855520937178
          total_loss: 0.018681204981274076
          vf_explained_var: 0.11707473546266556
          vf_loss: 0.014669379188368718
    num_agent_steps_sampled: 1290000
    num_agent_steps_trained: 1290000
    num_steps_sampled: 1290000
    num_steps_trained: 1290

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1290,56050.8,1290000,-2.8201,-2.14,-5.42,282.01


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1291000
  custom_metrics: {}
  date: 2021-10-25_08-06-16
  done: false
  episode_len_mean: 281.7
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.8169999999999846
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4747
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09209694263696042
          cur_lr: 5.000000000000001e-05
          entropy: 1.1362217161390515
          entropy_coeff: 0.009999999999999998
          kl: 0.0066971033347857185
          policy_loss: 0.025527279120352533
          total_loss: 0.027802127309971385
          vf_explained_var: 0.23960885405540466
          vf_loss: 0.013020283439093165
    num_agent_steps_sampled: 1291000
    num_agent_steps_trained: 1291000
    num_steps_sampled: 1291000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1291,56097.2,1291000,-2.817,-2.14,-5.42,281.7


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1292000
  custom_metrics: {}
  date: 2021-10-25_08-07-01
  done: false
  episode_len_mean: 282.07
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.820699999999984
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4751
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09209694263696042
          cur_lr: 5.000000000000001e-05
          entropy: 1.0928166959020826
          entropy_coeff: 0.009999999999999998
          kl: 0.006843935215686909
          policy_loss: 0.02160152362452613
          total_loss: 0.025397494435310364
          vf_explained_var: 0.15610717236995697
          vf_loss: 0.014093830953869554
    num_agent_steps_sampled: 1292000
    num_agent_steps_trained: 1292000
    num_steps_sampled: 1292000
    num_steps_trained: 1292

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1292,56142,1292000,-2.8207,-2.14,-5.42,282.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1293000
  custom_metrics: {}
  date: 2021-10-25_08-07-46
  done: false
  episode_len_mean: 282.45
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.824499999999984
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4755
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09209694263696042
          cur_lr: 5.000000000000001e-05
          entropy: 1.1945603900485569
          entropy_coeff: 0.009999999999999998
          kl: 0.07138419925601522
          policy_loss: 0.028344434996445973
          total_loss: 0.03531980241338412
          vf_explained_var: 0.27414435148239136
          vf_loss: 0.012346705639113982
    num_agent_steps_sampled: 1293000
    num_agent_steps_trained: 1293000
    num_steps_sampled: 1293000
    num_steps_trained: 12930

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1293,56186.9,1293000,-2.8245,-2.14,-5.42,282.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1294000
  custom_metrics: {}
  date: 2021-10-25_08-08-33
  done: false
  episode_len_mean: 282.65
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.826499999999984
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4759
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.090183957417806
          entropy_coeff: 0.009999999999999998
          kl: 0.010723867058536977
          policy_loss: 0.029292409867048265
          total_loss: 0.03061927548713154
          vf_explained_var: 0.26606544852256775
          vf_loss: 0.010747253749933508
    num_agent_steps_sampled: 1294000
    num_agent_steps_trained: 1294000
    num_steps_sampled: 1294000
    num_steps_trained: 12940

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1294,56233.5,1294000,-2.8265,-2.14,-5.42,282.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1295000
  custom_metrics: {}
  date: 2021-10-25_08-09-18
  done: false
  episode_len_mean: 281.12
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.811199999999984
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4763
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.1462555911805894
          entropy_coeff: 0.009999999999999998
          kl: 0.01464524782228799
          policy_loss: 0.016412240349584155
          total_loss: 0.01920437870754136
          vf_explained_var: 0.12298689782619476
          vf_loss: 0.012231522001740006
    num_agent_steps_sampled: 1295000
    num_agent_steps_trained: 1295000
    num_steps_sampled: 1295000
    num_steps_trained: 12950

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1295,56278.8,1295000,-2.8112,-2.14,-5.42,281.12


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1296000
  custom_metrics: {}
  date: 2021-10-25_08-10-05
  done: false
  episode_len_mean: 281.7
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.8169999999999833
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4767
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.1794572485817802
          entropy_coeff: 0.009999999999999998
          kl: 0.008346597079531904
          policy_loss: -0.003959225449297163
          total_loss: -0.0016443522853983772
          vf_explained_var: 0.06646678596735
          vf_loss: 0.012956398063235813
    num_agent_steps_sampled: 1296000
    num_agent_steps_trained: 1296000
    num_steps_sampled: 1296000
    num_steps_trained: 129

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1296,56325.4,1296000,-2.817,-2.14,-5.42,281.7




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1297000
  custom_metrics: {}
  date: 2021-10-25_08-11-09
  done: false
  episode_len_mean: 282.18
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.821799999999984
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4771
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.2900410069359673
          entropy_coeff: 0.009999999999999998
          kl: 0.018574663691248834
          policy_loss: -0.049941077621446714
          total_loss: -0.04754885741406017
          vf_explained_var: 0.11562095582485199
          vf_loss: 0.012726624134100147
    num_agent_steps_sampled: 1297000
    num_agent_steps_trained: 1297000
    num_steps_sampled: 1297000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1297,56389.1,1297000,-2.8218,-2.14,-5.42,282.18


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1298000
  custom_metrics: {}
  date: 2021-10-25_08-11-51
  done: false
  episode_len_mean: 283.5
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.8349999999999835
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4775
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.2716531541612413
          entropy_coeff: 0.009999999999999998
          kl: 0.01514251836816306
          policy_loss: 0.004246045814620124
          total_loss: 0.0066820917858017815
          vf_explained_var: 0.10330410301685333
          vf_loss: 0.013060709109736814
    num_agent_steps_sampled: 1298000
    num_agent_steps_trained: 1298000
    num_steps_sampled: 1298000
    num_steps_trained: 129

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1298,56431.2,1298000,-2.835,-2.14,-5.42,283.5


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1299000
  custom_metrics: {}
  date: 2021-10-25_08-12-34
  done: false
  episode_len_mean: 285.16
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.851599999999984
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4779
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.3189960453245375
          entropy_coeff: 0.009999999999999998
          kl: 0.019989131227481336
          policy_loss: 0.020102171434296504
          total_loss: 0.022115836292505263
          vf_explained_var: 0.09789562970399857
          vf_loss: 0.012442219329790937
    num_agent_steps_sampled: 1299000
    num_agent_steps_trained: 1299000
    num_steps_sampled: 1299000
    num_steps_trained: 129

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1299,56474.1,1299000,-2.8516,-2.14,-5.42,285.16


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1300000
  custom_metrics: {}
  date: 2021-10-25_08-13-19
  done: false
  episode_len_mean: 285.62
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.8561999999999825
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4783
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.1225443972481621
          entropy_coeff: 0.009999999999999998
          kl: 0.007562132035854398
          policy_loss: 0.026866020924515194
          total_loss: 0.02992749325931072
          vf_explained_var: 0.08414217084646225
          vf_loss: 0.013242239536096653
    num_agent_steps_sampled: 1300000
    num_agent_steps_trained: 1300000
    num_steps_sampled: 1300000
    num_steps_trained: 1300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1300,56519.9,1300000,-2.8562,-2.24,-5.42,285.62


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1301000
  custom_metrics: {}
  date: 2021-10-25_08-14-04
  done: false
  episode_len_mean: 282.64
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.826399999999984
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4787
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.0935997035768297
          entropy_coeff: 0.009999999999999998
          kl: 0.005107522369292165
          policy_loss: 0.012854475610786014
          total_loss: 0.017056150568856135
          vf_explained_var: 0.06831817328929901
          vf_loss: 0.014432089827540849
    num_agent_steps_sampled: 1301000
    num_agent_steps_trained: 1301000
    num_steps_sampled: 1301000
    num_steps_trained: 1301

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1301,56564.2,1301000,-2.8264,-2.24,-5.42,282.64


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1302000
  custom_metrics: {}
  date: 2021-10-25_08-14-50
  done: false
  episode_len_mean: 279.19
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.7918999999999854
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4791
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.1555678407351175
          entropy_coeff: 0.009999999999999998
          kl: 0.005626799720740472
          policy_loss: 0.015715782220164936
          total_loss: 0.01926406862007247
          vf_explained_var: 0.09455100446939468
          vf_loss: 0.01432664833135075
    num_agent_steps_sampled: 1302000
    num_agent_steps_trained: 1302000
    num_steps_sampled: 1302000
    num_steps_trained: 13020

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1302,56610.3,1302000,-2.7919,-2.24,-5.42,279.19


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1303000
  custom_metrics: {}
  date: 2021-10-25_08-15-36
  done: false
  episode_len_mean: 273.6
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.7359999999999847
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4795
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.1160951402452257
          entropy_coeff: 0.009999999999999998
          kl: 0.011201808429193546
          policy_loss: 0.016366892059644065
          total_loss: 0.0210874212698804
          vf_explained_var: 0.08381971716880798
          vf_loss: 0.014334002633889516
    num_agent_steps_sampled: 1303000
    num_agent_steps_trained: 1303000
    num_steps_sampled: 1303000
    num_steps_trained: 130300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1303,56656.5,1303000,-2.736,-2.24,-5.42,273.6


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1304000
  custom_metrics: {}
  date: 2021-10-25_08-16-18
  done: false
  episode_len_mean: 270.68
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.7067999999999866
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 4799
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.293941901789771
          entropy_coeff: 0.009999999999999998
          kl: 0.01058933870208397
          policy_loss: -0.002050678266419305
          total_loss: 0.0009063868059052361
          vf_explained_var: 0.10179266333580017
          vf_loss: 0.014433618003709448
    num_agent_steps_sampled: 1304000
    num_agent_steps_trained: 1304000
    num_steps_sampled: 1304000
    num_steps_trained: 130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1304,56698.6,1304000,-2.7068,-2.24,-5.42,270.68




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1305000
  custom_metrics: {}
  date: 2021-10-25_08-17-23
  done: false
  episode_len_mean: 263.53
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.635299999999988
  episode_reward_min: -4.769999999999943
  episodes_this_iter: 4
  episodes_total: 4803
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.1774929496977065
          entropy_coeff: 0.009999999999999998
          kl: 0.008970110565993948
          policy_loss: 0.018814862188365724
          total_loss: 0.021519621875551013
          vf_explained_var: 0.12736494839191437
          vf_loss: 0.013240510649565194
    num_agent_steps_sampled: 1305000
    num_agent_steps_trained: 1305000
    num_steps_sampled: 1305000
    num_steps_trained: 130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1305,56763.7,1305000,-2.6353,-2.14,-4.77,263.53


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1306000
  custom_metrics: {}
  date: 2021-10-25_08-18-09
  done: false
  episode_len_mean: 263.83
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.638299999999988
  episode_reward_min: -4.769999999999943
  episodes_this_iter: 4
  episodes_total: 4807
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.1994345545768739
          entropy_coeff: 0.009999999999999998
          kl: 0.010125678190104598
          policy_loss: 0.02008306309580803
          total_loss: 0.02381844984160529
          vf_explained_var: 0.10336394608020782
          vf_loss: 0.014330917089763615
    num_agent_steps_sampled: 1306000
    num_agent_steps_trained: 1306000
    num_steps_sampled: 1306000
    num_steps_trained: 13060

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1306,56808.8,1306000,-2.6383,-2.14,-4.77,263.83


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1307000
  custom_metrics: {}
  date: 2021-10-25_08-18-54
  done: false
  episode_len_mean: 261.89
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.618899999999988
  episode_reward_min: -4.769999999999943
  episodes_this_iter: 4
  episodes_total: 4811
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.253634136252933
          entropy_coeff: 0.009999999999999998
          kl: 0.0052876330802680135
          policy_loss: 0.0139106508758333
          total_loss: 0.01616532521115409
          vf_explained_var: 0.1397411972284317
          vf_loss: 0.014060550876375701
    num_agent_steps_sampled: 1307000
    num_agent_steps_trained: 1307000
    num_steps_sampled: 1307000
    num_steps_trained: 1307000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1307,56854.7,1307000,-2.6189,-2.14,-4.77,261.89


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1308000
  custom_metrics: {}
  date: 2021-10-25_08-19-38
  done: false
  episode_len_mean: 258.27
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.582699999999989
  episode_reward_min: -4.769999999999943
  episodes_this_iter: 4
  episodes_total: 4815
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.2377535581588746
          entropy_coeff: 0.009999999999999998
          kl: 0.00816371331373331
          policy_loss: 0.016568631596035426
          total_loss: 0.019467338257365758
          vf_explained_var: 0.10465030372142792
          vf_loss: 0.014148464602314764
    num_agent_steps_sampled: 1308000
    num_agent_steps_trained: 1308000
    num_steps_sampled: 1308000
    num_steps_trained: 1308

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1308,56898.4,1308000,-2.5827,-2.14,-4.77,258.27


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1309000
  custom_metrics: {}
  date: 2021-10-25_08-20-25
  done: false
  episode_len_mean: 254.51
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.5450999999999895
  episode_reward_min: -4.199999999999955
  episodes_this_iter: 4
  episodes_total: 4819
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.2441234323713515
          entropy_coeff: 0.009999999999999998
          kl: 0.006561732183358041
          policy_loss: 0.03075169457329644
          total_loss: 0.03127989288833406
          vf_explained_var: 0.10992632061243057
          vf_loss: 0.01206295873141951
    num_agent_steps_sampled: 1309000
    num_agent_steps_trained: 1309000
    num_steps_sampled: 1309000
    num_steps_trained: 13090

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1309,56945,1309000,-2.5451,-2.14,-4.2,254.51


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1310000
  custom_metrics: {}
  date: 2021-10-25_08-21-10
  done: false
  episode_len_mean: 252.94
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.5293999999999897
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 3
  episodes_total: 4822
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.270185523562961
          entropy_coeff: 0.009999999999999998
          kl: 0.008259345232158448
          policy_loss: -0.10236926244364844
          total_loss: -0.0997038291560279
          vf_explained_var: 0.12626025080680847
          vf_loss: 0.0142262972270449
    num_agent_steps_sampled: 1310000
    num_agent_steps_trained: 1310000
    num_steps_sampled: 1310000
    num_steps_trained: 131000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1310,56990,1310000,-2.5294,-2.14,-4.01,252.94


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1311000
  custom_metrics: {}
  date: 2021-10-25_08-21-54
  done: false
  episode_len_mean: 253.37
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.53369999999999
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 4826
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.2213659432199266
          entropy_coeff: 0.009999999999999998
          kl: 0.012461730907337515
          policy_loss: 0.003023780385653178
          total_loss: 0.004382732013861339
          vf_explained_var: 0.12786129117012024
          vf_loss: 0.01185108024833931
    num_agent_steps_sampled: 1311000
    num_agent_steps_trained: 1311000
    num_steps_sampled: 1311000
    num_steps_trained: 13110

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1311,57033.7,1311000,-2.5337,-2.14,-4.01,253.37




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1312000
  custom_metrics: {}
  date: 2021-10-25_08-22-53
  done: false
  episode_len_mean: 252.18
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.52179999999999
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 4830
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.2187962783707513
          entropy_coeff: 0.009999999999999998
          kl: 0.0077561938545094395
          policy_loss: -0.044670100510120395
          total_loss: -0.04137278729014927
          vf_explained_var: 0.13350117206573486
          vf_loss: 0.014413792546838522
    num_agent_steps_sampled: 1312000
    num_agent_steps_trained: 1312000
    num_steps_sampled: 1312000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1312,57093.1,1312000,-2.5218,-2.14,-4.01,252.18


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1313000
  custom_metrics: {}
  date: 2021-10-25_08-23-39
  done: false
  episode_len_mean: 252.69
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.52689999999999
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 4834
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.2927685088581509
          entropy_coeff: 0.009999999999999998
          kl: 0.01435399986579758
          policy_loss: -0.002921116848786672
          total_loss: 0.0007421347830030654
          vf_explained_var: 0.08788716793060303
          vf_loss: 0.014607997724993362
    num_agent_steps_sampled: 1313000
    num_agent_steps_trained: 1313000
    num_steps_sampled: 1313000
    num_steps_trained: 131

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1313,57138.8,1313000,-2.5269,-2.14,-4.01,252.69


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1314000
  custom_metrics: {}
  date: 2021-10-25_08-24-20
  done: false
  episode_len_mean: 252.09
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.52089999999999
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 4838
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.2638180918163724
          entropy_coeff: 0.009999999999999998
          kl: 0.008028587262663617
          policy_loss: 0.02021403056051996
          total_loss: 0.02392902962035603
          vf_explained_var: 0.06797806173563004
          vf_loss: 0.015244067429254453
    num_agent_steps_sampled: 1314000
    num_agent_steps_trained: 1314000
    num_steps_sampled: 1314000
    num_steps_trained: 13140

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1314,57180,1314000,-2.5209,-2.14,-3.4,252.09


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1315000
  custom_metrics: {}
  date: 2021-10-25_08-25-02
  done: false
  episode_len_mean: 252.95
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.52949999999999
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 3
  episodes_total: 4841
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.307397919230991
          entropy_coeff: 0.009999999999999998
          kl: 0.014889732629198705
          policy_loss: -0.09354679700401095
          total_loss: -0.09199545904994011
          vf_explained_var: 0.18065527081489563
          vf_loss: 0.012568367955585321
    num_agent_steps_sampled: 1315000
    num_agent_steps_trained: 1315000
    num_steps_sampled: 1315000
    num_steps_trained: 1315

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1315,57222,1315000,-2.5295,-2.14,-3.4,252.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1316000
  custom_metrics: {}
  date: 2021-10-25_08-25-42
  done: false
  episode_len_mean: 254.35
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.543499999999989
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 4845
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.3759752247068617
          entropy_coeff: 0.009999999999999998
          kl: 0.01117522895527843
          policy_loss: 0.020714804695712196
          total_loss: 0.02320164855983522
          vf_explained_var: 0.10664860159158707
          vf_loss: 0.014702790344340935
    num_agent_steps_sampled: 1316000
    num_agent_steps_trained: 1316000
    num_steps_sampled: 1316000
    num_steps_trained: 1316

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1316,57262,1316000,-2.5435,-2.14,-3.4,254.35


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1317000
  custom_metrics: {}
  date: 2021-10-25_08-26-21
  done: false
  episode_len_mean: 256.03
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.5602999999999896
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 3
  episodes_total: 4848
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.441383073065016
          entropy_coeff: 0.009999999999999998
          kl: 0.012963552634242603
          policy_loss: -0.009850946896606021
          total_loss: -0.013985028366247813
          vf_explained_var: 0.249291330575943
          vf_loss: 0.008488892686242859
    num_agent_steps_sampled: 1317000
    num_agent_steps_trained: 1317000
    num_steps_sampled: 1317000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1317,57301.4,1317000,-2.5603,-2.14,-3.4,256.03


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1318000
  custom_metrics: {}
  date: 2021-10-25_08-27-01
  done: false
  episode_len_mean: 257.73
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.5772999999999886
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 4852
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13814541395544058
          cur_lr: 5.000000000000001e-05
          entropy: 1.4707414282692803
          entropy_coeff: 0.009999999999999998
          kl: 0.024254869616279172
          policy_loss: 0.01169964729083909
          total_loss: 0.01485078272720178
          vf_explained_var: 0.24329276382923126
          vf_loss: 0.014507850549287266
    num_agent_steps_sampled: 1318000
    num_agent_steps_trained: 1318000
    num_steps_sampled: 1318000
    num_steps_trained: 131

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1318,57341.4,1318000,-2.5773,-2.14,-3.4,257.73


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1319000
  custom_metrics: {}
  date: 2021-10-25_08-27-41
  done: false
  episode_len_mean: 258.98
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.5897999999999883
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 3
  episodes_total: 4855
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 1.4550126168462965
          entropy_coeff: 0.009999999999999998
          kl: 0.009811473783712346
          policy_loss: 0.02909663054678175
          total_loss: 0.026220699730846616
          vf_explained_var: 0.1069583147764206
          vf_loss: 0.009641079228862913
    num_agent_steps_sampled: 1319000
    num_agent_steps_trained: 1319000
    num_steps_sampled: 1319000
    num_steps_trained: 1319

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1319,57381,1319000,-2.5898,-2.14,-3.4,258.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1320000
  custom_metrics: {}
  date: 2021-10-25_08-28-13
  done: false
  episode_len_mean: 262.32
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.623199999999988
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 4858
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 1.6428651915656196
          entropy_coeff: 0.009999999999999998
          kl: 0.013009367516005751
          policy_loss: 0.04573464790980021
          total_loss: 0.04048969770471255
          vf_explained_var: 0.39215442538261414
          vf_loss: 0.008487927420194157
    num_agent_steps_sampled: 1320000
    num_agent_steps_trained: 1320000
    num_steps_sampled: 1320000
    num_steps_trained: 13200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1320,57412.9,1320000,-2.6232,-2.14,-3.78,262.32




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1321000
  custom_metrics: {}
  date: 2021-10-25_08-29-10
  done: false
  episode_len_mean: 264.36
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.643599999999988
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 4861
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 1.5825904779964024
          entropy_coeff: 0.009999999999999998
          kl: 0.011561977144898587
          policy_loss: -0.014318024781015183
          total_loss: -0.01935444242424435
          vf_explained_var: 0.33383747935295105
          vf_loss: 0.00839363767704021
    num_agent_steps_sampled: 1321000
    num_agent_steps_trained: 1321000
    num_steps_sampled: 1321000
    num_steps_trained: 132

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1321,57469.6,1321000,-2.6436,-2.14,-3.78,264.36


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1322000
  custom_metrics: {}
  date: 2021-10-25_08-29-47
  done: false
  episode_len_mean: 266.48
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.6647999999999867
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 4864
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 1.511480540699429
          entropy_coeff: 0.009999999999999998
          kl: 0.008260057241256133
          policy_loss: -0.1221051154865159
          total_loss: -0.11952035220132934
          vf_explained_var: 0.13905765116214752
          vf_loss: 0.015987936966121198
    num_agent_steps_sampled: 1322000
    num_agent_steps_trained: 1322000
    num_steps_sampled: 1322000
    num_steps_trained: 1322

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1322,57506.8,1322000,-2.6648,-2.14,-3.78,266.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1323000
  custom_metrics: {}
  date: 2021-10-25_08-30-24
  done: false
  episode_len_mean: 269.01
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.6900999999999864
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 4867
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 1.463636467191908
          entropy_coeff: 0.009999999999999998
          kl: 0.01174677238146222
          policy_loss: -0.12718829015890756
          total_loss: -0.12494274824857712
          vf_explained_var: 0.16406583786010742
          vf_loss: 0.014447761865125763
    num_agent_steps_sampled: 1323000
    num_agent_steps_trained: 1323000
    num_steps_sampled: 1323000
    num_steps_trained: 1323

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1323,57544.1,1323000,-2.6901,-2.14,-3.78,269.01


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1324000
  custom_metrics: {}
  date: 2021-10-25_08-31-00
  done: false
  episode_len_mean: 271.63
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.7162999999999853
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 4870
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 1.4543728152910869
          entropy_coeff: 0.009999999999999998
          kl: 0.011302516092993563
          policy_loss: -0.14235637113451957
          total_loss: -0.1399190156824059
          vf_explained_var: 0.1680057793855667
          vf_loss: 0.014638998576750358
    num_agent_steps_sampled: 1324000
    num_agent_steps_trained: 1324000
    num_steps_sampled: 1324000
    num_steps_trained: 1324

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1324,57579.6,1324000,-2.7163,-2.14,-3.78,271.63


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1325000
  custom_metrics: {}
  date: 2021-10-25_08-31-34
  done: false
  episode_len_mean: 273.72
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.7371999999999854
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 4873
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 1.4833144240909153
          entropy_coeff: 0.009999999999999998
          kl: 0.011447332082062495
          policy_loss: -0.13012298362122643
          total_loss: -0.12922104746103286
          vf_explained_var: 0.14209960401058197
          vf_loss: 0.013362985394067235
    num_agent_steps_sampled: 1325000
    num_agent_steps_trained: 1325000
    num_steps_sampled: 1325000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1325,57613.5,1325000,-2.7372,-2.14,-3.78,273.72


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1326000
  custom_metrics: {}
  date: 2021-10-25_08-32-10
  done: false
  episode_len_mean: 274.68
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.746799999999985
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 4876
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 1.4162762509451972
          entropy_coeff: 0.009999999999999998
          kl: 0.015562024634694221
          policy_loss: -0.10604763353864352
          total_loss: -0.10364294648170472
          vf_explained_var: 0.13501407206058502
          vf_loss: 0.013342715437627502
    num_agent_steps_sampled: 1326000
    num_agent_steps_trained: 1326000
    num_steps_sampled: 1326000
    num_steps_trained: 132

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1326,57649.8,1326000,-2.7468,-2.14,-3.78,274.68


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1327000
  custom_metrics: {}
  date: 2021-10-25_08-32-44
  done: false
  episode_len_mean: 277.61
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.776099999999984
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 4879
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 1.4881746768951416
          entropy_coeff: 0.009999999999999998
          kl: 0.01597865559366601
          policy_loss: -0.12026719897985458
          total_loss: -0.12096296764082379
          vf_explained_var: 0.26638263463974
          vf_loss: 0.010874909840317236
    num_agent_steps_sampled: 1327000
    num_agent_steps_trained: 1327000
    num_steps_sampled: 1327000
    num_steps_trained: 1327000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1327,57683.6,1327000,-2.7761,-2.14,-3.78,277.61


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1328000
  custom_metrics: {}
  date: 2021-10-25_08-33-18
  done: false
  episode_len_mean: 280.54
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.8053999999999832
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 4882
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 1.4555255280600654
          entropy_coeff: 0.009999999999999998
          kl: 0.009646808011872407
          policy_loss: -0.08620372083452013
          total_loss: -0.0848723329603672
          vf_explained_var: 0.05367804691195488
          vf_loss: 0.013887648853576845
    num_agent_steps_sampled: 1328000
    num_agent_steps_trained: 1328000
    num_steps_sampled: 1328000
    num_steps_trained: 132

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1328,57717.6,1328000,-2.8054,-2.14,-3.78,280.54


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1329000
  custom_metrics: {}
  date: 2021-10-25_08-33-55
  done: false
  episode_len_mean: 283.11
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.8310999999999833
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 4885
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 1.4023842824829948
          entropy_coeff: 0.009999999999999998
          kl: 0.013246749005654024
          policy_loss: -0.11242171161704594
          total_loss: -0.10940038396252526
          vf_explained_var: 0.07175354659557343
          vf_loss: 0.01430020754535993
    num_agent_steps_sampled: 1329000
    num_agent_steps_trained: 1329000
    num_steps_sampled: 1329000
    num_steps_trained: 132

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1329,57754.2,1329000,-2.8311,-2.14,-3.78,283.11


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1330000
  custom_metrics: {}
  date: 2021-10-25_08-34-30
  done: false
  episode_len_mean: 284.61
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.846099999999983
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 4888
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 1.323639616701338
          entropy_coeff: 0.009999999999999998
          kl: 0.014578509157063971
          policy_loss: -0.12214383251137204
          total_loss: -0.11822502762079239
          vf_explained_var: 0.09864942729473114
          vf_loss: 0.014134270728876193
    num_agent_steps_sampled: 1330000
    num_agent_steps_trained: 1330000
    num_steps_sampled: 1330000
    num_steps_trained: 1330

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1330,57789.6,1330000,-2.8461,-2.14,-3.78,284.61




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1331000
  custom_metrics: {}
  date: 2021-10-25_08-35-27
  done: false
  episode_len_mean: 286.37
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.863699999999983
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4892
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 1.3005879428651599
          entropy_coeff: 0.009999999999999998
          kl: 0.009935578813833225
          policy_loss: 0.020364569044775432
          total_loss: 0.021081547687451043
          vf_explained_var: 0.16537928581237793
          vf_loss: 0.011664025040550365
    num_agent_steps_sampled: 1331000
    num_agent_steps_trained: 1331000
    num_steps_sampled: 1331000
    num_steps_trained: 133

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1331,57846.9,1331000,-2.8637,-2.14,-3.78,286.37


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1332000
  custom_metrics: {}
  date: 2021-10-25_08-36-10
  done: false
  episode_len_mean: 288.16
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.881599999999983
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4896
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 1.2161018795437284
          entropy_coeff: 0.009999999999999998
          kl: 0.006437166638384421
          policy_loss: -0.010884095148907768
          total_loss: -0.007442983281281259
          vf_explained_var: 0.14367422461509705
          vf_loss: 0.014268232561233971
    num_agent_steps_sampled: 1332000
    num_agent_steps_trained: 1332000
    num_steps_sampled: 1332000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1332,57889.4,1332000,-2.8816,-2.14,-3.78,288.16


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1333000
  custom_metrics: {}
  date: 2021-10-25_08-36-51
  done: false
  episode_len_mean: 288.4
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.8839999999999826
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 4899
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 1.14626368549135
          entropy_coeff: 0.009999999999999998
          kl: 0.010486357745733857
          policy_loss: 0.02749600691927804
          total_loss: 0.0283218781153361
          vf_explained_var: 0.11897873133420944
          vf_loss: 0.01011554492223594
    num_agent_steps_sampled: 1333000
    num_agent_steps_trained: 1333000
    num_steps_sampled: 1333000
    num_steps_trained: 1333000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1333,57930.1,1333000,-2.884,-2.14,-3.78,288.4


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1334000
  custom_metrics: {}
  date: 2021-10-25_08-37-31
  done: false
  episode_len_mean: 290.63
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.9062999999999817
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4903
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 1.1104872359169855
          entropy_coeff: 0.009999999999999998
          kl: 0.011373862905668611
          policy_loss: -0.15712224808004166
          total_loss: -0.15437948736879561
          vf_explained_var: 0.5897068381309509
          vf_loss: 0.011490762430346675
    num_agent_steps_sampled: 1334000
    num_agent_steps_trained: 1334000
    num_steps_sampled: 1334000
    num_steps_trained: 1334

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1334,57970.3,1334000,-2.9063,-2.15,-3.78,290.63


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1335000
  custom_metrics: {}
  date: 2021-10-25_08-38-10
  done: false
  episode_len_mean: 291.93
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.9192999999999825
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 4906
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 0.9949046976036495
          entropy_coeff: 0.009999999999999998
          kl: 0.01986676392229943
          policy_loss: -0.028894258124960794
          total_loss: -0.02502977665927675
          vf_explained_var: 0.05341837555170059
          vf_loss: 0.00969677635261582
    num_agent_steps_sampled: 1335000
    num_agent_steps_trained: 1335000
    num_steps_sampled: 1335000
    num_steps_trained: 1335

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1335,58009.2,1335000,-2.9193,-2.15,-3.78,291.93


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1336000
  custom_metrics: {}
  date: 2021-10-25_08-38-48
  done: false
  episode_len_mean: 293.5
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.9349999999999814
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 4909
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 1.3112338337633345
          entropy_coeff: 0.009999999999999998
          kl: 0.02279620069343148
          policy_loss: 0.011759439773029751
          total_loss: 0.010895646777417925
          vf_explained_var: 0.14311201870441437
          vf_loss: 0.007524759464043503
    num_agent_steps_sampled: 1336000
    num_agent_steps_trained: 1336000
    num_steps_sampled: 1336000
    num_steps_trained: 13360

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1336,58047.6,1336000,-2.935,-2.15,-3.78,293.5


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1337000
  custom_metrics: {}
  date: 2021-10-25_08-39-29
  done: false
  episode_len_mean: 294.78
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.9477999999999804
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4913
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3108271813997413
          cur_lr: 5.000000000000001e-05
          entropy: 1.2543632765611012
          entropy_coeff: 0.009999999999999998
          kl: 0.016148187615783627
          policy_loss: 0.010023997558487787
          total_loss: 0.011578937537140316
          vf_explained_var: 0.14698320627212524
          vf_loss: 0.00907927823977338
    num_agent_steps_sampled: 1337000
    num_agent_steps_trained: 1337000
    num_steps_sampled: 1337000
    num_steps_trained: 1337

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1337,58087.9,1337000,-2.9478,-2.15,-3.78,294.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1338000
  custom_metrics: {}
  date: 2021-10-25_08-40-08
  done: false
  episode_len_mean: 296.44
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.96439999999998
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 4916
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3108271813997413
          cur_lr: 5.000000000000001e-05
          entropy: 1.6799117154545253
          entropy_coeff: 0.009999999999999998
          kl: 0.013234344455753364
          policy_loss: 0.03872178196907043
          total_loss: 0.034988291064898175
          vf_explained_var: -0.17610915005207062
          vf_loss: 0.008952035795017663
    num_agent_steps_sampled: 1338000
    num_agent_steps_trained: 1338000
    num_steps_sampled: 1338000
    num_steps_trained: 13380

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1338,58127,1338000,-2.9644,-2.15,-3.78,296.44




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1339000
  custom_metrics: {}
  date: 2021-10-25_08-41-01
  done: false
  episode_len_mean: 297.56
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.9755999999999805
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4920
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3108271813997413
          cur_lr: 5.000000000000001e-05
          entropy: 1.5482705526881748
          entropy_coeff: 0.009999999999999998
          kl: 0.011362759755278755
          policy_loss: -0.010694924617807071
          total_loss: -0.010234534781840112
          vf_explained_var: 0.14369356632232666
          vf_loss: 0.01241124157483379
    num_agent_steps_sampled: 1339000
    num_agent_steps_trained: 1339000
    num_steps_sampled: 1339000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1339,58180.3,1339000,-2.9756,-2.15,-3.78,297.56


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1340000
  custom_metrics: {}
  date: 2021-10-25_08-41-48
  done: false
  episode_len_mean: 297.78
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.9777999999999802
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 4923
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3108271813997413
          cur_lr: 5.000000000000001e-05
          entropy: 1.121350058582094
          entropy_coeff: 0.009999999999999998
          kl: 0.006312503008411518
          policy_loss: -0.01737194839451048
          total_loss: -0.01864175084564421
          vf_explained_var: 0.20864175260066986
          vf_loss: 0.007981603367564578
    num_agent_steps_sampled: 1340000
    num_agent_steps_trained: 1340000
    num_steps_sampled: 1340000
    num_steps_trained: 1340

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1340,58226.8,1340000,-2.9778,-2.15,-3.78,297.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1341000
  custom_metrics: {}
  date: 2021-10-25_08-42-29
  done: false
  episode_len_mean: 298.07
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.98069999999998
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4927
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3108271813997413
          cur_lr: 5.000000000000001e-05
          entropy: 1.0692492842674255
          entropy_coeff: 0.009999999999999998
          kl: 0.004896030603574925
          policy_loss: 0.03362543144159847
          total_loss: 0.03317081621951527
          vf_explained_var: 0.4053120017051697
          vf_loss: 0.008716053728454022
    num_agent_steps_sampled: 1341000
    num_agent_steps_trained: 1341000
    num_steps_sampled: 1341000
    num_steps_trained: 1341000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1341,58268.2,1341000,-2.9807,-2.15,-3.78,298.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1342000
  custom_metrics: {}
  date: 2021-10-25_08-43-09
  done: false
  episode_len_mean: 299.41
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.99409999999998
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 4931
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15541359069987065
          cur_lr: 5.000000000000001e-05
          entropy: 1.33563898007075
          entropy_coeff: 0.009999999999999998
          kl: 0.01752916200275744
          policy_loss: -0.03272910250557794
          total_loss: -0.03131932814915975
          vf_explained_var: 0.43122947216033936
          vf_loss: 0.012041891666336192
    num_agent_steps_sampled: 1342000
    num_agent_steps_trained: 1342000
    num_steps_sampled: 1342000
    num_steps_trained: 1342000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1342,58308.3,1342000,-2.9941,-2.3,-3.78,299.41


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1343000
  custom_metrics: {}
  date: 2021-10-25_08-43-48
  done: false
  episode_len_mean: 299.65
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.9964999999999797
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 4934
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15541359069987065
          cur_lr: 5.000000000000001e-05
          entropy: 1.1330223997433981
          entropy_coeff: 0.009999999999999998
          kl: 0.013004508021185263
          policy_loss: 0.04631199604935116
          total_loss: 0.04439938134617276
          vf_explained_var: -0.24233995378017426
          vf_loss: 0.007396528350525639
    num_agent_steps_sampled: 1343000
    num_agent_steps_trained: 1343000
    num_steps_sampled: 1343000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1343,58347.6,1343000,-2.9965,-2.27,-3.78,299.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1344000
  custom_metrics: {}
  date: 2021-10-25_08-44-22
  done: false
  episode_len_mean: 301.55
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -3.01549999999998
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 3
  episodes_total: 4937
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15541359069987065
          cur_lr: 5.000000000000001e-05
          entropy: 1.431041967206531
          entropy_coeff: 0.009999999999999998
          kl: 0.01821323275937819
          policy_loss: -0.010143724746174282
          total_loss: -0.009733575251367357
          vf_explained_var: -0.37781381607055664
          vf_loss: 0.011889988762171318
    num_agent_steps_sampled: 1344000
    num_agent_steps_trained: 1344000
    num_steps_sampled: 1344000
    num_steps_trained: 134

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1344,58381.3,1344000,-3.0155,-2.27,-4.02,301.55


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1345000
  custom_metrics: {}
  date: 2021-10-25_08-45-00
  done: false
  episode_len_mean: 303.44
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -3.0343999999999793
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 3
  episodes_total: 4940
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15541359069987065
          cur_lr: 5.000000000000001e-05
          entropy: 1.4866659005482992
          entropy_coeff: 0.009999999999999998
          kl: 0.023738276473583633
          policy_loss: 0.058205487661891514
          total_loss: 0.05357807270354695
          vf_explained_var: 0.1911439448595047
          vf_loss: 0.006549995442199159
    num_agent_steps_sampled: 1345000
    num_agent_steps_trained: 1345000
    num_steps_sampled: 1345000
    num_steps_trained: 1345

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1345,58419.5,1345000,-3.0344,-2.27,-4.02,303.44


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1346000
  custom_metrics: {}
  date: 2021-10-25_08-45-42
  done: false
  episode_len_mean: 303.3
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -3.032999999999979
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 4
  episodes_total: 4944
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23312038604980592
          cur_lr: 5.000000000000001e-05
          entropy: 1.3333979262246025
          entropy_coeff: 0.009999999999999998
          kl: 0.02134054970707674
          policy_loss: 0.022022178851895863
          total_loss: 0.0253108239836163
          vf_explained_var: 0.1578492373228073
          vf_loss: 0.011647704750713375
    num_agent_steps_sampled: 1346000
    num_agent_steps_trained: 1346000
    num_steps_sampled: 1346000
    num_steps_trained: 1346000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1346,58461.6,1346000,-3.033,-2.27,-4.02,303.3


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1347000
  custom_metrics: {}
  date: 2021-10-25_08-46-26
  done: false
  episode_len_mean: 301.81
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -3.018099999999979
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 4
  episodes_total: 4948
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3496805790747089
          cur_lr: 5.000000000000001e-05
          entropy: 1.0605336480670504
          entropy_coeff: 0.009999999999999998
          kl: 0.006162791688909566
          policy_loss: -0.009665257276760207
          total_loss: -0.005916655105021265
          vf_explained_var: 0.15817046165466309
          vf_loss: 0.012198931858357456
    num_agent_steps_sampled: 1347000
    num_agent_steps_trained: 1347000
    num_steps_sampled: 1347000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1347,58504.9,1347000,-3.0181,-2.27,-4.02,301.81




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1348000
  custom_metrics: {}
  date: 2021-10-25_08-47-29
  done: false
  episode_len_mean: 300.7
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -3.0069999999999792
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 4
  episodes_total: 4952
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3496805790747089
          cur_lr: 5.000000000000001e-05
          entropy: 1.3120463530222575
          entropy_coeff: 0.009999999999999998
          kl: 0.0076391672682051045
          policy_loss: -0.005204183028803932
          total_loss: -0.003025088790390227
          vf_explained_var: 0.1617877185344696
          vf_loss: 0.012628288670546479
    num_agent_steps_sampled: 1348000
    num_agent_steps_trained: 1348000
    num_steps_sampled: 1348000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1348,58567.8,1348000,-3.007,-2.27,-4.02,300.7


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1349000
  custom_metrics: {}
  date: 2021-10-25_08-48-13
  done: false
  episode_len_mean: 299.56
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.9955999999999796
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 3
  episodes_total: 4955
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3496805790747089
          cur_lr: 5.000000000000001e-05
          entropy: 1.136001952489217
          entropy_coeff: 0.009999999999999998
          kl: 0.00731042165167237
          policy_loss: -0.06885514019264115
          total_loss: -0.06680948941244019
          vf_explained_var: 0.12209849804639816
          vf_loss: 0.010849356040772465
    num_agent_steps_sampled: 1349000
    num_agent_steps_trained: 1349000
    num_steps_sampled: 1349000
    num_steps_trained: 13490

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1349,58612.4,1349000,-2.9956,-2.27,-4.02,299.56


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1350000
  custom_metrics: {}
  date: 2021-10-25_08-48-57
  done: false
  episode_len_mean: 295.77
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.957699999999981
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 4
  episodes_total: 4959
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3496805790747089
          cur_lr: 5.000000000000001e-05
          entropy: 1.2726551307572258
          entropy_coeff: 0.009999999999999998
          kl: 0.01008793693894153
          policy_loss: -0.023003249698215062
          total_loss: -0.020642079330152937
          vf_explained_var: 0.3107777237892151
          vf_loss: 0.011560162539697355
    num_agent_steps_sampled: 1350000
    num_agent_steps_trained: 1350000
    num_steps_sampled: 1350000
    num_steps_trained: 1350

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1350,58655.7,1350000,-2.9577,-2.27,-4.02,295.77


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1351000
  custom_metrics: {}
  date: 2021-10-25_08-49-38
  done: false
  episode_len_mean: 294.57
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.9456999999999804
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 4
  episodes_total: 4963
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3496805790747089
          cur_lr: 5.000000000000001e-05
          entropy: 1.412200997935401
          entropy_coeff: 0.009999999999999998
          kl: 0.006472512136336188
          policy_loss: 0.025229460332128736
          total_loss: 0.02547454552517997
          vf_explained_var: 0.17795221507549286
          vf_loss: 0.012103786526454819
    num_agent_steps_sampled: 1351000
    num_agent_steps_trained: 1351000
    num_steps_sampled: 1351000
    num_steps_trained: 13510

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1351,58697.1,1351000,-2.9457,-2.27,-4.02,294.57


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1352000
  custom_metrics: {}
  date: 2021-10-25_08-50-19
  done: false
  episode_len_mean: 292.41
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.924099999999981
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 4
  episodes_total: 4967
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3496805790747089
          cur_lr: 5.000000000000001e-05
          entropy: 1.3316332512431674
          entropy_coeff: 0.009999999999999998
          kl: 0.008165531121896583
          policy_loss: 0.002939402891529931
          total_loss: 0.005190176847908232
          vf_explained_var: 0.12987568974494934
          vf_loss: 0.01271177666882674
    num_agent_steps_sampled: 1352000
    num_agent_steps_trained: 1352000
    num_steps_sampled: 1352000
    num_steps_trained: 13520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1352,58738.2,1352000,-2.9241,-2.27,-4.02,292.41


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1353000
  custom_metrics: {}
  date: 2021-10-25_08-51-03
  done: false
  episode_len_mean: 290.66
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.9065999999999814
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 3
  episodes_total: 4970
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3496805790747089
          cur_lr: 5.000000000000001e-05
          entropy: 1.3757780604892307
          entropy_coeff: 0.009999999999999998
          kl: 0.0074092370950771015
          policy_loss: -0.04621590885023276
          total_loss: -0.04720509553121196
          vf_explained_var: 0.1105976328253746
          vf_loss: 0.010177726381354862
    num_agent_steps_sampled: 1353000
    num_agent_steps_trained: 1353000
    num_steps_sampled: 1353000
    num_steps_trained: 135

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1353,58781.7,1353000,-2.9066,-2.27,-4.02,290.66


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1354000
  custom_metrics: {}
  date: 2021-10-25_08-51-43
  done: false
  episode_len_mean: 288.9
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.888999999999981
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 4
  episodes_total: 4974
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3496805790747089
          cur_lr: 5.000000000000001e-05
          entropy: 1.3410336560673184
          entropy_coeff: 0.009999999999999998
          kl: 0.007229972820449859
          policy_loss: 0.04378910279936261
          total_loss: 0.04534529091583358
          vf_explained_var: 0.29985561966896057
          vf_loss: 0.01243834247191747
    num_agent_steps_sampled: 1354000
    num_agent_steps_trained: 1354000
    num_steps_sampled: 1354000
    num_steps_trained: 1354000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1354,58821.7,1354000,-2.889,-2.27,-4.02,288.9


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1355000
  custom_metrics: {}
  date: 2021-10-25_08-52-24
  done: false
  episode_len_mean: 285.83
  episode_media: {}
  episode_reward_max: -2.2699999999999956
  episode_reward_mean: -2.858299999999982
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 4
  episodes_total: 4978
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3496805790747089
          cur_lr: 5.000000000000001e-05
          entropy: 1.235408620039622
          entropy_coeff: 0.009999999999999998
          kl: 0.008947835720960976
          policy_loss: 0.0038675686551464927
          total_loss: 0.0083522273434533
          vf_explained_var: 0.0779276043176651
          vf_loss: 0.013709856559418969
    num_agent_steps_sampled: 1355000
    num_agent_steps_trained: 1355000
    num_steps_sampled: 1355000
    num_steps_trained: 1355000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1355,58862.9,1355000,-2.8583,-2.27,-4.02,285.83




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1356000
  custom_metrics: {}
  date: 2021-10-25_08-53-28
  done: false
  episode_len_mean: 281.94
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8193999999999835
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 4
  episodes_total: 4982
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3496805790747089
          cur_lr: 5.000000000000001e-05
          entropy: 1.2430991835064358
          entropy_coeff: 0.009999999999999998
          kl: 0.007458364493841114
          policy_loss: 0.012507296105225881
          total_loss: 0.016615823490752114
          vf_explained_var: 0.10378485918045044
          vf_loss: 0.013931475083033244
    num_agent_steps_sampled: 1356000
    num_agent_steps_trained: 1356000
    num_steps_sampled: 1356000
    num_steps_trained: 135

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1356,58926.4,1356000,-2.8194,-2.22,-4.02,281.94


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1357000
  custom_metrics: {}
  date: 2021-10-25_08-54-12
  done: false
  episode_len_mean: 279.93
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.799299999999984
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 3
  episodes_total: 4985
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3496805790747089
          cur_lr: 5.000000000000001e-05
          entropy: 1.1609590768814086
          entropy_coeff: 0.009999999999999998
          kl: 0.005334371790217318
          policy_loss: -0.10085570050610436
          total_loss: -0.09684383504920535
          vf_explained_var: 0.1044716015458107
          vf_loss: 0.013756128390216164
    num_agent_steps_sampled: 1357000
    num_agent_steps_trained: 1357000
    num_steps_sampled: 1357000
    num_steps_trained: 13570

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1357,58970.6,1357000,-2.7993,-2.22,-4.02,279.93


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1358000
  custom_metrics: {}
  date: 2021-10-25_08-54-56
  done: false
  episode_len_mean: 277.85
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.778499999999984
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 4
  episodes_total: 4989
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3496805790747089
          cur_lr: 5.000000000000001e-05
          entropy: 1.1512604037920633
          entropy_coeff: 0.009999999999999998
          kl: 0.007207886465561349
          policy_loss: -0.01417227801349428
          total_loss: -0.012262424495485093
          vf_explained_var: 0.11433674395084381
          vf_loss: 0.010902000454047487
    num_agent_steps_sampled: 1358000
    num_agent_steps_trained: 1358000
    num_steps_sampled: 1358000
    num_steps_trained: 135

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1358,59014.4,1358000,-2.7785,-2.22,-4.02,277.85


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1359000
  custom_metrics: {}
  date: 2021-10-25_08-55-43
  done: false
  episode_len_mean: 276.18
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.761799999999985
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 4
  episodes_total: 4993
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3496805790747089
          cur_lr: 5.000000000000001e-05
          entropy: 1.0196227378315397
          entropy_coeff: 0.009999999999999998
          kl: 0.010081633051874503
          policy_loss: -0.12968404392401378
          total_loss: -0.11756090331408713
          vf_explained_var: 0.1125398650765419
          vf_loss: 0.018794017140236166
    num_agent_steps_sampled: 1359000
    num_agent_steps_trained: 1359000
    num_steps_sampled: 1359000
    num_steps_trained: 13590

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1359,59061.6,1359000,-2.7618,-2.22,-4.02,276.18


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1360000
  custom_metrics: {}
  date: 2021-10-25_08-56-27
  done: false
  episode_len_mean: 274.41
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.7440999999999853
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 5
  episodes_total: 4998
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3496805790747089
          cur_lr: 5.000000000000001e-05
          entropy: 0.953405644496282
          entropy_coeff: 0.009999999999999998
          kl: 0.0070412093192689755
          policy_loss: -0.028739996833933724
          total_loss: -0.020006646629836825
          vf_explained_var: 0.13227993249893188
          vf_loss: 0.01580523041387399
    num_agent_steps_sampled: 1360000
    num_agent_steps_trained: 1360000
    num_steps_sampled: 1360000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1360,59105.3,1360000,-2.7441,-2.22,-4.02,274.41


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1361000
  custom_metrics: {}
  date: 2021-10-25_08-57-12
  done: false
  episode_len_mean: 273.03
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.7302999999999855
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 3
  episodes_total: 5001
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3496805790747089
          cur_lr: 5.000000000000001e-05
          entropy: 0.9861486646864149
          entropy_coeff: 0.009999999999999998
          kl: 0.0049030292756008795
          policy_loss: -0.0728968614505397
          total_loss: -0.06712075678838623
          vf_explained_var: 0.1910434514284134
          vf_loss: 0.013923097029328346
    num_agent_steps_sampled: 1361000
    num_agent_steps_trained: 1361000
    num_steps_sampled: 1361000
    num_steps_trained: 1361

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1361,59150.7,1361000,-2.7303,-2.22,-4.02,273.03


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1362000
  custom_metrics: {}
  date: 2021-10-25_08-57-58
  done: false
  episode_len_mean: 272.37
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.723699999999986
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 4
  episodes_total: 5005
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17484028953735445
          cur_lr: 5.000000000000001e-05
          entropy: 0.8956069270769755
          entropy_coeff: 0.009999999999999998
          kl: 0.014632764643206998
          policy_loss: -0.11865109296308624
          total_loss: -0.10980772284997833
          vf_explained_var: 0.2311536818742752
          vf_loss: 0.015241043171327975
    num_agent_steps_sampled: 1362000
    num_agent_steps_trained: 1362000
    num_steps_sampled: 1362000
    num_steps_trained: 1362

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1362,59196.7,1362000,-2.7237,-2.22,-4.02,272.37




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1363000
  custom_metrics: {}
  date: 2021-10-25_08-59-00
  done: false
  episode_len_mean: 268.98
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.689799999999987
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 5
  episodes_total: 5010
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17484028953735445
          cur_lr: 5.000000000000001e-05
          entropy: 0.9703231685691409
          entropy_coeff: 0.009999999999999998
          kl: 0.009351106561070013
          policy_loss: -0.011853862388266457
          total_loss: -0.0048533720274766285
          vf_explained_var: 0.19067247211933136
          vf_loss: 0.015068770396626657
    num_agent_steps_sampled: 1363000
    num_agent_steps_trained: 1363000
    num_steps_sampled: 1363000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1363,59258.4,1363000,-2.6898,-2.05,-4.02,268.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1364000
  custom_metrics: {}
  date: 2021-10-25_08-59-49
  done: false
  episode_len_mean: 266.74
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.667399999999987
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 4
  episodes_total: 5014
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17484028953735445
          cur_lr: 5.000000000000001e-05
          entropy: 0.9706058661142986
          entropy_coeff: 0.009999999999999998
          kl: 0.0055192977786853075
          policy_loss: 0.02789794761273596
          total_loss: 0.03149351196156608
          vf_explained_var: 0.1137160211801529
          vf_loss: 0.012336623140921195
    num_agent_steps_sampled: 1364000
    num_agent_steps_trained: 1364000
    num_steps_sampled: 1364000
    num_steps_trained: 13640

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1364,59307.4,1364000,-2.6674,-2.05,-4.02,266.74


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1365000
  custom_metrics: {}
  date: 2021-10-25_09-00-35
  done: false
  episode_len_mean: 264.67
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.646699999999987
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 4
  episodes_total: 5018
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17484028953735445
          cur_lr: 5.000000000000001e-05
          entropy: 1.0190600944889916
          entropy_coeff: 0.009999999999999998
          kl: 0.008193241528466426
          policy_loss: 0.012278285788165198
          total_loss: 0.015762658417224885
          vf_explained_var: 0.09841667860746384
          vf_loss: 0.012242463479439417
    num_agent_steps_sampled: 1365000
    num_agent_steps_trained: 1365000
    num_steps_sampled: 1365000
    num_steps_trained: 136

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1365,59353.9,1365000,-2.6467,-2.05,-4.02,264.67


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1366000
  custom_metrics: {}
  date: 2021-10-25_09-01-20
  done: false
  episode_len_mean: 263.97
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.639699999999987
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 4
  episodes_total: 5022
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17484028953735445
          cur_lr: 5.000000000000001e-05
          entropy: 1.0239194399780698
          entropy_coeff: 0.009999999999999998
          kl: 0.0059130704315847995
          policy_loss: 0.018584596034553317
          total_loss: 0.022413389550315008
          vf_explained_var: 0.07661715894937515
          vf_loss: 0.013034144499235683
    num_agent_steps_sampled: 1366000
    num_agent_steps_trained: 1366000
    num_steps_sampled: 1366000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1366,59398.9,1366000,-2.6397,-2.05,-4.02,263.97


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1367000
  custom_metrics: {}
  date: 2021-10-25_09-02-06
  done: false
  episode_len_mean: 263.38
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.633799999999987
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 4
  episodes_total: 5026
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17484028953735445
          cur_lr: 5.000000000000001e-05
          entropy: 0.9667807108826107
          entropy_coeff: 0.009999999999999998
          kl: 0.005685271436922948
          policy_loss: 0.011292366766267353
          total_loss: 0.015873346477746964
          vf_explained_var: 0.08583919703960419
          vf_loss: 0.013254769850108358
    num_agent_steps_sampled: 1367000
    num_agent_steps_trained: 1367000
    num_steps_sampled: 1367000
    num_steps_trained: 136

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1367,59445,1367000,-2.6338,-2.05,-4.02,263.38


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1368000
  custom_metrics: {}
  date: 2021-10-25_09-02-51
  done: false
  episode_len_mean: 260.89
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.608899999999988
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 4
  episodes_total: 5030
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17484028953735445
          cur_lr: 5.000000000000001e-05
          entropy: 0.9839038829008738
          entropy_coeff: 0.009999999999999998
          kl: 0.0045919250520253425
          policy_loss: -0.0026712964806291793
          total_loss: 0.0016602531903319889
          vf_explained_var: 0.07254839688539505
          vf_loss: 0.013367734601100285
    num_agent_steps_sampled: 1368000
    num_agent_steps_trained: 1368000
    num_steps_sampled: 1368000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1368,59489.8,1368000,-2.6089,-2.05,-4.02,260.89


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1369000
  custom_metrics: {}
  date: 2021-10-25_09-03-38
  done: false
  episode_len_mean: 259.96
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5995999999999877
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 4
  episodes_total: 5034
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08742014476867722
          cur_lr: 5.000000000000001e-05
          entropy: 0.9882277952300178
          entropy_coeff: 0.009999999999999998
          kl: 0.0046740689540942845
          policy_loss: -0.01644183811214235
          total_loss: -0.01249954675634702
          vf_explained_var: 0.07225216925144196
          vf_loss: 0.013415962664617432
    num_agent_steps_sampled: 1369000
    num_agent_steps_trained: 1369000
    num_steps_sampled: 1369000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1369,59536.6,1369000,-2.5996,-2.05,-4.02,259.96


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1370000
  custom_metrics: {}
  date: 2021-10-25_09-04-25
  done: false
  episode_len_mean: 255.57
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5556999999999888
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 4
  episodes_total: 5038
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04371007238433861
          cur_lr: 5.000000000000001e-05
          entropy: 0.9005550622940064
          entropy_coeff: 0.009999999999999998
          kl: 0.023816526770496674
          policy_loss: -0.13904199649890264
          total_loss: -0.1301994551387098
          vf_explained_var: 0.08984916657209396
          vf_loss: 0.01680707023996446
    num_agent_steps_sampled: 1370000
    num_agent_steps_trained: 1370000
    num_steps_sampled: 1370000
    num_steps_trained: 137

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1370,59583.4,1370000,-2.5557,-2.05,-3.6,255.57




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1371000
  custom_metrics: {}
  date: 2021-10-25_09-05-30
  done: false
  episode_len_mean: 252.72
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.52719999999999
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 5
  episodes_total: 5043
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06556510857650792
          cur_lr: 5.000000000000001e-05
          entropy: 0.9360105693340302
          entropy_coeff: 0.009999999999999998
          kl: 0.00871648716889233
          policy_loss: 0.007082402457793554
          total_loss: 0.013629760924312803
          vf_explained_var: 0.1052032858133316
          vf_loss: 0.015335963956183857
    num_agent_steps_sampled: 1371000
    num_agent_steps_trained: 1371000
    num_steps_sampled: 1371000
    num_steps_trained: 13710

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1371,59648.2,1371000,-2.5272,-2.05,-3.41,252.72


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1372000
  custom_metrics: {}
  date: 2021-10-25_09-06-17
  done: false
  episode_len_mean: 251.92
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.51919999999999
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 5047
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06556510857650792
          cur_lr: 5.000000000000001e-05
          entropy: 0.9604531248410543
          entropy_coeff: 0.009999999999999998
          kl: 0.008392573343643997
          policy_loss: 0.011707239598035813
          total_loss: 0.015993285924196243
          vf_explained_var: 0.10103893280029297
          vf_loss: 0.01334031607127852
    num_agent_steps_sampled: 1372000
    num_agent_steps_trained: 1372000
    num_steps_sampled: 1372000
    num_steps_trained: 1372

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1372,59695.4,1372000,-2.5192,-2.05,-3.41,251.92


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1373000
  custom_metrics: {}
  date: 2021-10-25_09-07-04
  done: false
  episode_len_mean: 251.15
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.51149999999999
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 5051
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06556510857650792
          cur_lr: 5.000000000000001e-05
          entropy: 0.9464557919237349
          entropy_coeff: 0.009999999999999998
          kl: 0.007968121149458322
          policy_loss: 0.027374925629960165
          total_loss: 0.03123499399258031
          vf_explained_var: 0.09754446893930435
          vf_loss: 0.012802194948825571
    num_agent_steps_sampled: 1373000
    num_agent_steps_trained: 1373000
    num_steps_sampled: 1373000
    num_steps_trained: 1373

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1373,59742.8,1373000,-2.5115,-2.05,-3.41,251.15


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1374000
  custom_metrics: {}
  date: 2021-10-25_09-07-53
  done: false
  episode_len_mean: 250.27
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5026999999999906
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 5055
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06556510857650792
          cur_lr: 5.000000000000001e-05
          entropy: 0.9274790141317579
          entropy_coeff: 0.009999999999999998
          kl: 0.011202676469291519
          policy_loss: -0.014286914385027355
          total_loss: -0.009545353800058365
          vf_explained_var: 0.1058604046702385
          vf_loss: 0.013281846853593985
    num_agent_steps_sampled: 1374000
    num_agent_steps_trained: 1374000
    num_steps_sampled: 1374000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1374,59791.6,1374000,-2.5027,-2.05,-3.41,250.27


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1375000
  custom_metrics: {}
  date: 2021-10-25_09-08-39
  done: false
  episode_len_mean: 248.77
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4876999999999905
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 5
  episodes_total: 5060
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06556510857650792
          cur_lr: 5.000000000000001e-05
          entropy: 0.9407450828287337
          entropy_coeff: 0.009999999999999998
          kl: 0.014763034600327766
          policy_loss: -0.0293882231745455
          total_loss: -0.020975552250941595
          vf_explained_var: 0.1183178722858429
          vf_loss: 0.016852176862044467
    num_agent_steps_sampled: 1375000
    num_agent_steps_trained: 1375000
    num_steps_sampled: 1375000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1375,59837.6,1375000,-2.4877,-2.05,-3.41,248.77


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1376000
  custom_metrics: {}
  date: 2021-10-25_09-09-29
  done: false
  episode_len_mean: 247.49
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.474899999999991
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 5064
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06556510857650792
          cur_lr: 5.000000000000001e-05
          entropy: 1.0333950771225824
          entropy_coeff: 0.009999999999999998
          kl: 0.016789878177885638
          policy_loss: 0.005087409334050284
          total_loss: 0.008225241262051795
          vf_explained_var: 0.23759007453918457
          vf_loss: 0.012370951421972778
    num_agent_steps_sampled: 1376000
    num_agent_steps_trained: 1376000
    num_steps_sampled: 1376000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1376,59887.1,1376000,-2.4749,-2.05,-3.41,247.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1377000
  custom_metrics: {}
  date: 2021-10-25_09-10-18
  done: false
  episode_len_mean: 245.83
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4582999999999915
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 5068
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06556510857650792
          cur_lr: 5.000000000000001e-05
          entropy: 1.0319533089796702
          entropy_coeff: 0.009999999999999998
          kl: 0.012506408201712955
          policy_loss: 0.01964638390474849
          total_loss: 0.02170396637585428
          vf_explained_var: 0.21762818098068237
          vf_loss: 0.011557131871167156
    num_agent_steps_sampled: 1377000
    num_agent_steps_trained: 1377000
    num_steps_sampled: 1377000
    num_steps_trained: 137

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1377,59936,1377000,-2.4583,-2.05,-3.41,245.83




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1378000
  custom_metrics: {}
  date: 2021-10-25_09-11-25
  done: false
  episode_len_mean: 244.4
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.443999999999991
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 5
  episodes_total: 5073
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06556510857650792
          cur_lr: 5.000000000000001e-05
          entropy: 1.0184178875552283
          entropy_coeff: 0.009999999999999998
          kl: 0.014187992081554373
          policy_loss: -0.03693272533516089
          total_loss: -0.029873046982619497
          vf_explained_var: 0.2492573857307434
          vf_loss: 0.016313618193897934
    num_agent_steps_sampled: 1378000
    num_agent_steps_trained: 1378000
    num_steps_sampled: 1378000
    num_steps_trained: 137

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1378,60002.9,1378000,-2.444,-2.05,-3.41,244.4


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1379000
  custom_metrics: {}
  date: 2021-10-25_09-12-11
  done: false
  episode_len_mean: 242.78
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.427799999999992
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 5077
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06556510857650792
          cur_lr: 5.000000000000001e-05
          entropy: 1.073796682887607
          entropy_coeff: 0.009999999999999998
          kl: 0.017065227153454682
          policy_loss: 0.002035987377166748
          total_loss: 0.0047569062974717885
          vf_explained_var: 0.2855670750141144
          vf_loss: 0.012340002776020103
    num_agent_steps_sampled: 1379000
    num_agent_steps_trained: 1379000
    num_steps_sampled: 1379000
    num_steps_trained: 1379

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1379,60049.4,1379000,-2.4278,-2.05,-2.95,242.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1380000
  custom_metrics: {}
  date: 2021-10-25_09-12-57
  done: false
  episode_len_mean: 242.75
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.427499999999992
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 5081
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06556510857650792
          cur_lr: 5.000000000000001e-05
          entropy: 1.1504143231444888
          entropy_coeff: 0.009999999999999998
          kl: 0.018409657825262906
          policy_loss: 0.007326633648739921
          total_loss: 0.008660752450426419
          vf_explained_var: 0.3003564774990082
          vf_loss: 0.011631233400354784
    num_agent_steps_sampled: 1380000
    num_agent_steps_trained: 1380000
    num_steps_sampled: 1380000
    num_steps_trained: 1380

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1380,60095.4,1380000,-2.4275,-2.05,-2.95,242.75


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1381000
  custom_metrics: {}
  date: 2021-10-25_09-13-44
  done: false
  episode_len_mean: 241.98
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4197999999999924
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 5085
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06556510857650792
          cur_lr: 5.000000000000001e-05
          entropy: 1.1914053294393752
          entropy_coeff: 0.009999999999999998
          kl: 0.01868176919388126
          policy_loss: 0.015106157379017935
          total_loss: 0.015777862030598853
          vf_explained_var: 0.32835859060287476
          vf_loss: 0.011360886061771049
    num_agent_steps_sampled: 1381000
    num_agent_steps_trained: 1381000
    num_steps_sampled: 1381000
    num_steps_trained: 138

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1381,60142.1,1381000,-2.4198,-2.05,-2.95,241.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1382000
  custom_metrics: {}
  date: 2021-10-25_09-14-30
  done: false
  episode_len_mean: 241.27
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.412699999999992
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 5089
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06556510857650792
          cur_lr: 5.000000000000001e-05
          entropy: 1.1356895367304485
          entropy_coeff: 0.009999999999999998
          kl: 0.009504672375397642
          policy_loss: 0.022708699024385878
          total_loss: 0.024795357137918472
          vf_explained_var: 0.26040130853652954
          vf_loss: 0.012820379725760883
    num_agent_steps_sampled: 1382000
    num_agent_steps_trained: 1382000
    num_steps_sampled: 1382000
    num_steps_trained: 138

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1382,60188.3,1382000,-2.4127,-2.05,-2.95,241.27


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1383000
  custom_metrics: {}
  date: 2021-10-25_09-15-17
  done: false
  episode_len_mean: 241.14
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4113999999999924
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 5093
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06556510857650792
          cur_lr: 5.000000000000001e-05
          entropy: 1.1483912838829888
          entropy_coeff: 0.009999999999999998
          kl: 0.025867212608840697
          policy_loss: 0.021822036306063334
          total_loss: 0.02454060340921084
          vf_explained_var: 0.25946661829948425
          vf_loss: 0.012506492032359044
    num_agent_steps_sampled: 1383000
    num_agent_steps_trained: 1383000
    num_steps_sampled: 1383000
    num_steps_trained: 138

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1383,60235.2,1383000,-2.4114,-2.05,-2.95,241.14


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1384000
  custom_metrics: {}
  date: 2021-10-25_09-16-03
  done: false
  episode_len_mean: 241.56
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4155999999999924
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 5097
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09834766286476193
          cur_lr: 5.000000000000001e-05
          entropy: 1.1938506338331434
          entropy_coeff: 0.009999999999999998
          kl: 0.03136702781505208
          policy_loss: -0.009139904917942154
          total_loss: -0.006022256861130396
          vf_explained_var: 0.35770347714424133
          vf_loss: 0.011971280609981882
    num_agent_steps_sampled: 1384000
    num_agent_steps_trained: 1384000
    num_steps_sampled: 1384000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1384,60281.4,1384000,-2.4156,-2.05,-2.95,241.56




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1385000
  custom_metrics: {}
  date: 2021-10-25_09-17-07
  done: false
  episode_len_mean: 241.25
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4124999999999925
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 5101
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1475214942971428
          cur_lr: 5.000000000000001e-05
          entropy: 1.1121158599853516
          entropy_coeff: 0.009999999999999998
          kl: 0.010631951632819497
          policy_loss: 0.01570803187787533
          total_loss: 0.01945151591466533
          vf_explained_var: 0.1880006343126297
          vf_loss: 0.013296202621940109
    num_agent_steps_sampled: 1385000
    num_agent_steps_trained: 1385000
    num_steps_sampled: 1385000
    num_steps_trained: 138500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1385,60344.5,1385000,-2.4125,-2.05,-2.95,241.25


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1386000
  custom_metrics: {}
  date: 2021-10-25_09-17-54
  done: false
  episode_len_mean: 241.04
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4103999999999925
  episode_reward_min: -2.769999999999985
  episodes_this_iter: 4
  episodes_total: 5105
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1475214942971428
          cur_lr: 5.000000000000001e-05
          entropy: 1.1892060187127855
          entropy_coeff: 0.009999999999999998
          kl: 0.008153828924931735
          policy_loss: 0.023313125140137142
          total_loss: 0.024987755881415472
          vf_explained_var: 0.1900404840707779
          vf_loss: 0.012363826856017113
    num_agent_steps_sampled: 1386000
    num_agent_steps_trained: 1386000
    num_steps_sampled: 1386000
    num_steps_trained: 1386

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1386,60391.5,1386000,-2.4104,-2.05,-2.77,241.04


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1387000
  custom_metrics: {}
  date: 2021-10-25_09-18-36
  done: false
  episode_len_mean: 241.72
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4171999999999922
  episode_reward_min: -3.0299999999999794
  episodes_this_iter: 4
  episodes_total: 5109
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1475214942971428
          cur_lr: 5.000000000000001e-05
          entropy: 1.3099035461743673
          entropy_coeff: 0.009999999999999998
          kl: 0.02811242042944018
          policy_loss: 0.03237539484269089
          total_loss: 0.03427609586053425
          vf_explained_var: 0.20692507922649384
          vf_loss: 0.010852545717110236
    num_agent_steps_sampled: 1387000
    num_agent_steps_trained: 1387000
    num_steps_sampled: 1387000
    num_steps_trained: 13870

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1387,60434.2,1387000,-2.4172,-2.05,-3.03,241.72


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1388000
  custom_metrics: {}
  date: 2021-10-25_09-19-21
  done: false
  episode_len_mean: 242.84
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.428399999999992
  episode_reward_min: -3.0299999999999794
  episodes_this_iter: 4
  episodes_total: 5113
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2212822414457142
          cur_lr: 5.000000000000001e-05
          entropy: 1.0450895216729905
          entropy_coeff: 0.009999999999999998
          kl: 0.007663662710212233
          policy_loss: 0.012022360331482358
          total_loss: 0.015836233645677565
          vf_explained_var: 0.21791990101337433
          vf_loss: 0.012568935162077348
    num_agent_steps_sampled: 1388000
    num_agent_steps_trained: 1388000
    num_steps_sampled: 1388000
    num_steps_trained: 1388000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1388,60479.3,1388000,-2.4284,-2.06,-3.03,242.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1389000
  custom_metrics: {}
  date: 2021-10-25_09-20-07
  done: false
  episode_len_mean: 243.02
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.430199999999992
  episode_reward_min: -3.0299999999999794
  episodes_this_iter: 4
  episodes_total: 5117
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2212822414457142
          cur_lr: 5.000000000000001e-05
          entropy: 1.0518327891826629
          entropy_coeff: 0.009999999999999998
          kl: 0.005614657134201906
          policy_loss: 0.0267692887948619
          total_loss: 0.030242816358804703
          vf_explained_var: 0.18522535264492035
          vf_loss: 0.012749433941725228
    num_agent_steps_sampled: 1389000
    num_agent_steps_trained: 1389000
    num_steps_sampled: 1389000
    num_steps_trained: 1389000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1389,60525.2,1389000,-2.4302,-2.06,-3.03,243.02


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1390000
  custom_metrics: {}
  date: 2021-10-25_09-20-51
  done: false
  episode_len_mean: 243.02
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.430199999999992
  episode_reward_min: -3.0299999999999794
  episodes_this_iter: 4
  episodes_total: 5121
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2212822414457142
          cur_lr: 5.000000000000001e-05
          entropy: 1.031502072016398
          entropy_coeff: 0.009999999999999998
          kl: 0.008981780695199544
          policy_loss: 0.026377052317063013
          total_loss: 0.03118377708726459
          vf_explained_var: 0.18300506472587585
          vf_loss: 0.013134235371318128
    num_agent_steps_sampled: 1390000
    num_agent_steps_trained: 1390000
    num_steps_sampled: 1390000
    num_steps_trained: 1390000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1390,60569.3,1390000,-2.4302,-2.06,-3.03,243.02


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1391000
  custom_metrics: {}
  date: 2021-10-25_09-21-36
  done: false
  episode_len_mean: 243.84
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4383999999999917
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 5125
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2212822414457142
          cur_lr: 5.000000000000001e-05
          entropy: 1.3498109181722004
          entropy_coeff: 0.009999999999999998
          kl: 0.010762377670513286
          policy_loss: -0.006212276882595486
          total_loss: -0.0027150289879904854
          vf_explained_var: 0.1505814492702484
          vf_loss: 0.014613831974565982
    num_agent_steps_sampled: 1391000
    num_agent_steps_trained: 1391000
    num_steps_sampled: 1391000
    num_steps_trained: 1391000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1391,60613.5,1391000,-2.4384,-2.06,-3.12,243.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1392000
  custom_metrics: {}
  date: 2021-10-25_09-22-21
  done: false
  episode_len_mean: 244.08
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.440799999999992
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 5129
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2212822414457142
          cur_lr: 5.000000000000001e-05
          entropy: 1.112179935640759
          entropy_coeff: 0.009999999999999998
          kl: 0.007221137741830969
          policy_loss: -0.00671930644247267
          total_loss: -0.003524611724747552
          vf_explained_var: 0.3160122036933899
          vf_loss: 0.012718583586522274
    num_agent_steps_sampled: 1392000
    num_agent_steps_trained: 1392000
    num_steps_sampled: 1392000
    num_steps_trained: 1392000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1392,60658.7,1392000,-2.4408,-2.06,-3.12,244.08




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1393000
  custom_metrics: {}
  date: 2021-10-25_09-23-23
  done: false
  episode_len_mean: 244.28
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4427999999999916
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 5133
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2212822414457142
          cur_lr: 5.000000000000001e-05
          entropy: 1.2493522776497734
          entropy_coeff: 0.009999999999999998
          kl: 0.013630253306705085
          policy_loss: -0.022084346579180823
          total_loss: -0.01874531474378374
          vf_explained_var: 0.27015045285224915
          vf_loss: 0.012816420156094762
    num_agent_steps_sampled: 1393000
    num_agent_steps_trained: 1393000
    num_steps_sampled: 1393000
    num_steps_trained: 1393000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1393,60721,1393000,-2.4428,-2.06,-3.12,244.28


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1394000
  custom_metrics: {}
  date: 2021-10-25_09-24-05
  done: false
  episode_len_mean: 245.49
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4548999999999914
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 3
  episodes_total: 5136
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2212822414457142
          cur_lr: 5.000000000000001e-05
          entropy: 1.4564027773009407
          entropy_coeff: 0.009999999999999998
          kl: 0.014441857027780083
          policy_loss: 0.017690856009721756
          total_loss: 0.015490608000093036
          vf_explained_var: 0.37898895144462585
          vf_loss: 0.009168054190942914
    num_agent_steps_sampled: 1394000
    num_agent_steps_trained: 1394000
    num_steps_sampled: 1394000
    num_steps_trained: 1394000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1394,60762.4,1394000,-2.4549,-2.06,-3.32,245.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1395000
  custom_metrics: {}
  date: 2021-10-25_09-24-49
  done: false
  episode_len_mean: 246.52
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4651999999999914
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5140
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2212822414457142
          cur_lr: 5.000000000000001e-05
          entropy: 1.2314383837911818
          entropy_coeff: 0.009999999999999998
          kl: 0.0060999668831144835
          policy_loss: 0.013841431753502952
          total_loss: 0.01588217309779591
          vf_explained_var: 0.2736002504825592
          vf_loss: 0.013005308248102665
    num_agent_steps_sampled: 1395000
    num_agent_steps_trained: 1395000
    num_steps_sampled: 1395000
    num_steps_trained: 1395000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1395,60806.8,1395000,-2.4652,-2.06,-3.32,246.52


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1396000
  custom_metrics: {}
  date: 2021-10-25_09-25-34
  done: false
  episode_len_mean: 247.09
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.470899999999991
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5144
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2212822414457142
          cur_lr: 5.000000000000001e-05
          entropy: 1.3320102585686577
          entropy_coeff: 0.009999999999999998
          kl: 0.008782473269371848
          policy_loss: 0.01031207645104991
          total_loss: 0.011797523332966698
          vf_explained_var: 0.3559728264808655
          vf_loss: 0.012862144700355
    num_agent_steps_sampled: 1396000
    num_agent_steps_trained: 1396000
    num_steps_sampled: 1396000
    num_steps_trained: 1396000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1396,60852.1,1396000,-2.4709,-2.06,-3.32,247.09


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1397000
  custom_metrics: {}
  date: 2021-10-25_09-26-19
  done: false
  episode_len_mean: 248.36
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4835999999999907
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5148
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2212822414457142
          cur_lr: 5.000000000000001e-05
          entropy: 1.3374678638246325
          entropy_coeff: 0.009999999999999998
          kl: 0.00851775554985951
          policy_loss: -0.01575211998489168
          total_loss: -0.014654000765747494
          vf_explained_var: 0.3871680498123169
          vf_loss: 0.01258796848770645
    num_agent_steps_sampled: 1397000
    num_agent_steps_trained: 1397000
    num_steps_sampled: 1397000
    num_steps_trained: 1397000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1397,60896.2,1397000,-2.4836,-2.06,-3.32,248.36


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1398000
  custom_metrics: {}
  date: 2021-10-25_09-27-03
  done: false
  episode_len_mean: 248.87
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.488699999999991
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5152
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2212822414457142
          cur_lr: 5.000000000000001e-05
          entropy: 1.2326956298616198
          entropy_coeff: 0.009999999999999998
          kl: 0.012877177763305762
          policy_loss: -0.00568273551762104
          total_loss: -0.0028910336809025872
          vf_explained_var: 0.3554920256137848
          vf_loss: 0.012269166205078364
    num_agent_steps_sampled: 1398000
    num_agent_steps_trained: 1398000
    num_steps_sampled: 1398000
    num_steps_trained: 1398000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1398,60941,1398000,-2.4887,-2.06,-3.32,248.87


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1399000
  custom_metrics: {}
  date: 2021-10-25_09-27-50
  done: false
  episode_len_mean: 249.55
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.495499999999991
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5156
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2212822414457142
          cur_lr: 5.000000000000001e-05
          entropy: 1.15964761045244
          entropy_coeff: 0.009999999999999998
          kl: 0.006609911232777993
          policy_loss: -0.0074328377842903135
          total_loss: -0.005000980198383331
          vf_explained_var: 0.2820592224597931
          vf_loss: 0.012565680893345012
    num_agent_steps_sampled: 1399000
    num_agent_steps_trained: 1399000
    num_steps_sampled: 1399000
    num_steps_trained: 1399000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1399,60987.8,1399000,-2.4955,-2.06,-3.32,249.55




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1400000
  custom_metrics: {}
  date: 2021-10-25_09-28-54
  done: false
  episode_len_mean: 249.44
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4943999999999904
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5160
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2212822414457142
          cur_lr: 5.000000000000001e-05
          entropy: 1.092498120996687
          entropy_coeff: 0.009999999999999998
          kl: 0.006934486670153067
          policy_loss: 0.02086749052007993
          total_loss: 0.023917394710911644
          vf_explained_var: 0.200783371925354
          vf_loss: 0.01244040722441342
    num_agent_steps_sampled: 1400000
    num_agent_steps_trained: 1400000
    num_steps_sampled: 1400000
    num_steps_trained: 1400000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1400,61051.9,1400000,-2.4944,-2.06,-3.32,249.44


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1401000
  custom_metrics: {}
  date: 2021-10-25_09-29-43
  done: false
  episode_len_mean: 249.71
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4970999999999908
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5164
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2212822414457142
          cur_lr: 5.000000000000001e-05
          entropy: 1.0663453035884434
          entropy_coeff: 0.009999999999999998
          kl: 0.005005828581439036
          policy_loss: 0.01889002646009127
          total_loss: 0.0223086508611838
          vf_explained_var: 0.2127327024936676
          vf_loss: 0.012974375786466732
    num_agent_steps_sampled: 1401000
    num_agent_steps_trained: 1401000
    num_steps_sampled: 1401000
    num_steps_trained: 1401000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1401,61100.5,1401000,-2.4971,-2.06,-3.32,249.71


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1402000
  custom_metrics: {}
  date: 2021-10-25_09-30-29
  done: false
  episode_len_mean: 250.47
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.5046999999999904
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5168
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2212822414457142
          cur_lr: 5.000000000000001e-05
          entropy: 1.1160462617874145
          entropy_coeff: 0.009999999999999998
          kl: 0.011064239165822541
          policy_loss: 0.016025070307983294
          total_loss: 0.020092265721824435
          vf_explained_var: 0.24240678548812866
          vf_loss: 0.012779338243934844
    num_agent_steps_sampled: 1402000
    num_agent_steps_trained: 1402000
    num_steps_sampled: 1402000
    num_steps_trained: 1402000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1402,61146.2,1402000,-2.5047,-2.06,-3.32,250.47


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1403000
  custom_metrics: {}
  date: 2021-10-25_09-31-17
  done: false
  episode_len_mean: 250.92
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.5091999999999905
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5172
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2212822414457142
          cur_lr: 5.000000000000001e-05
          entropy: 0.9260673317644331
          entropy_coeff: 0.009999999999999998
          kl: 0.00831241522620745
          policy_loss: 0.02740436212884055
          total_loss: 0.033042251318693164
          vf_explained_var: 0.15437446534633636
          vf_loss: 0.013059172303312355
    num_agent_steps_sampled: 1403000
    num_agent_steps_trained: 1403000
    num_steps_sampled: 1403000
    num_steps_trained: 140300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1403,61194.4,1403000,-2.5092,-2.1,-3.32,250.92


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1404000
  custom_metrics: {}
  date: 2021-10-25_09-32-02
  done: false
  episode_len_mean: 250.79
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.5078999999999905
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5176
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2212822414457142
          cur_lr: 5.000000000000001e-05
          entropy: 0.9540983027882046
          entropy_coeff: 0.009999999999999998
          kl: 0.005057534716248016
          policy_loss: -0.028519187039799162
          total_loss: -0.02334637339744303
          vf_explained_var: 0.15520340204238892
          vf_loss: 0.013594651781022548
    num_agent_steps_sampled: 1404000
    num_agent_steps_trained: 1404000
    num_steps_sampled: 1404000
    num_steps_trained: 140

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1404,61239.6,1404000,-2.5079,-2.1,-3.32,250.79


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1405000
  custom_metrics: {}
  date: 2021-10-25_09-32-50
  done: false
  episode_len_mean: 251.39
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.5138999999999903
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5180
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2212822414457142
          cur_lr: 5.000000000000001e-05
          entropy: 0.9281611250506507
          entropy_coeff: 0.009999999999999998
          kl: 0.002986223626836843
          policy_loss: 0.005271113498343361
          total_loss: 0.010386137498749627
          vf_explained_var: 0.11548937857151031
          vf_loss: 0.01373583342259129
    num_agent_steps_sampled: 1405000
    num_agent_steps_trained: 1405000
    num_steps_sampled: 1405000
    num_steps_trained: 14050

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1405,61287.6,1405000,-2.5139,-2.1,-3.32,251.39


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1406000
  custom_metrics: {}
  date: 2021-10-25_09-33-36
  done: false
  episode_len_mean: 250.78
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.5077999999999903
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5184
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1106411207228571
          cur_lr: 5.000000000000001e-05
          entropy: 0.9659476564990149
          entropy_coeff: 0.009999999999999998
          kl: 0.005747803449265534
          policy_loss: -0.057157139397329755
          total_loss: -0.052178076489104164
          vf_explained_var: 0.14771327376365662
          vf_loss: 0.014002595355527269
    num_agent_steps_sampled: 1406000
    num_agent_steps_trained: 1406000
    num_steps_sampled: 1406000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1406,61333.5,1406000,-2.5078,-2.1,-3.32,250.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1407000
  custom_metrics: {}
  date: 2021-10-25_09-34-23
  done: false
  episode_len_mean: 250.64
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.5063999999999904
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 5
  episodes_total: 5189
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1106411207228571
          cur_lr: 5.000000000000001e-05
          entropy: 1.0457121511300405
          entropy_coeff: 0.009999999999999998
          kl: 0.007711897699940175
          policy_loss: -0.02536899008684688
          total_loss: -0.018578523728582593
          vf_explained_var: 0.19365477561950684
          vf_loss: 0.01639433673893412
    num_agent_steps_sampled: 1407000
    num_agent_steps_trained: 1407000
    num_steps_sampled: 1407000
    num_steps_trained: 1407

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1407,61380.8,1407000,-2.5064,-2.1,-3.32,250.64




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1408000
  custom_metrics: {}
  date: 2021-10-25_09-35-28
  done: false
  episode_len_mean: 250.31
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.5030999999999906
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5193
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1106411207228571
          cur_lr: 5.000000000000001e-05
          entropy: 1.0845056414604186
          entropy_coeff: 0.009999999999999998
          kl: 0.005230607516614801
          policy_loss: 0.009856003440088697
          total_loss: 0.01244954549603992
          vf_explained_var: 0.22257055342197418
          vf_loss: 0.012859883832020892
    num_agent_steps_sampled: 1408000
    num_agent_steps_trained: 1408000
    num_steps_sampled: 1408000
    num_steps_trained: 14080

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1408,61445.3,1408000,-2.5031,-2.1,-3.32,250.31


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1409000
  custom_metrics: {}
  date: 2021-10-25_09-36-15
  done: false
  episode_len_mean: 249.88
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.4987999999999904
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5197
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1106411207228571
          cur_lr: 5.000000000000001e-05
          entropy: 1.0459185904926724
          entropy_coeff: 0.009999999999999998
          kl: 0.010120553418625396
          policy_loss: 0.026973450349436864
          total_loss: 0.030444800357023873
          vf_explained_var: 0.17261627316474915
          vf_loss: 0.01281078736194306
    num_agent_steps_sampled: 1409000
    num_agent_steps_trained: 1409000
    num_steps_sampled: 1409000
    num_steps_trained: 14090

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1409,61491.9,1409000,-2.4988,-2.1,-3.32,249.88


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1410000
  custom_metrics: {}
  date: 2021-10-25_09-37-00
  done: false
  episode_len_mean: 250.16
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.5015999999999905
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5201
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1106411207228571
          cur_lr: 5.000000000000001e-05
          entropy: 1.0307599663734437
          entropy_coeff: 0.009999999999999998
          kl: 0.00768654856688471
          policy_loss: 0.04249948693646325
          total_loss: 0.0460571073823505
          vf_explained_var: 0.12473930418491364
          vf_loss: 0.013014770537200902
    num_agent_steps_sampled: 1410000
    num_agent_steps_trained: 1410000
    num_steps_sampled: 1410000
    num_steps_trained: 1410000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1410,61537.4,1410000,-2.5016,-2.12,-3.32,250.16


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1411000
  custom_metrics: {}
  date: 2021-10-25_09-37-47
  done: false
  episode_len_mean: 249.88
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.49879999999999
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5205
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1106411207228571
          cur_lr: 5.000000000000001e-05
          entropy: 1.0792372677061293
          entropy_coeff: 0.009999999999999998
          kl: 0.009057526452164824
          policy_loss: 0.026151006751590305
          total_loss: 0.028595986838142077
          vf_explained_var: 0.12429217249155045
          vf_loss: 0.01223521756215228
    num_agent_steps_sampled: 1411000
    num_agent_steps_trained: 1411000
    num_steps_sampled: 1411000
    num_steps_trained: 141100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1411,61584.4,1411000,-2.4988,-2.12,-3.32,249.88


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1412000
  custom_metrics: {}
  date: 2021-10-25_09-38-33
  done: false
  episode_len_mean: 249.16
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4915999999999907
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5209
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1106411207228571
          cur_lr: 5.000000000000001e-05
          entropy: 1.0951391458511353
          entropy_coeff: 0.009999999999999998
          kl: 0.0039226444844947155
          policy_loss: -0.014392942935228348
          total_loss: -0.011430185702111986
          vf_explained_var: 0.11856798827648163
          vf_loss: 0.013480139327132039
    num_agent_steps_sampled: 1412000
    num_agent_steps_trained: 1412000
    num_steps_sampled: 1412000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1412,61630.4,1412000,-2.4916,-2.12,-3.32,249.16


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1413000
  custom_metrics: {}
  date: 2021-10-25_09-39-19
  done: false
  episode_len_mean: 248.81
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.488099999999991
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5213
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05532056036142855
          cur_lr: 5.000000000000001e-05
          entropy: 1.1511044171121385
          entropy_coeff: 0.009999999999999998
          kl: 0.010656221646758605
          policy_loss: -0.02300561707880762
          total_loss: -0.0205761323372523
          vf_explained_var: 0.1667526513338089
          vf_loss: 0.013351021189656523
    num_agent_steps_sampled: 1413000
    num_agent_steps_trained: 1413000
    num_steps_sampled: 1413000
    num_steps_trained: 14130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1413,61676.1,1413000,-2.4881,-2.12,-3.32,248.81


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1414000
  custom_metrics: {}
  date: 2021-10-25_09-40-02
  done: false
  episode_len_mean: 249.47
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4946999999999906
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5217
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05532056036142855
          cur_lr: 5.000000000000001e-05
          entropy: 1.2147206412421332
          entropy_coeff: 0.009999999999999998
          kl: 0.019697736977767652
          policy_loss: 0.0010835497743553585
          total_loss: 0.003150303413470586
          vf_explained_var: 0.24649596214294434
          vf_loss: 0.013124270447426372
    num_agent_steps_sampled: 1414000
    num_agent_steps_trained: 1414000
    num_steps_sampled: 1414000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1414,61719.4,1414000,-2.4947,-2.12,-3.32,249.47




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1415000
  custom_metrics: {}
  date: 2021-10-25_09-41-08
  done: false
  episode_len_mean: 249.1
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4909999999999908
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5221
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05532056036142855
          cur_lr: 5.000000000000001e-05
          entropy: 1.2504388981395298
          entropy_coeff: 0.009999999999999998
          kl: 0.015560557434835687
          policy_loss: -0.007780956228574117
          total_loss: -0.00650937921471066
          vf_explained_var: 0.26275596022605896
          vf_loss: 0.0129151476547122
    num_agent_steps_sampled: 1415000
    num_agent_steps_trained: 1415000
    num_steps_sampled: 1415000
    num_steps_trained: 1415

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1415,61784.7,1415000,-2.491,-2.12,-3.32,249.1


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1416000
  custom_metrics: {}
  date: 2021-10-25_09-41-54
  done: false
  episode_len_mean: 248.47
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.484699999999991
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5225
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05532056036142855
          cur_lr: 5.000000000000001e-05
          entropy: 1.219853863451216
          entropy_coeff: 0.009999999999999998
          kl: 0.011336087426243088
          policy_loss: -0.014812549700339635
          total_loss: -0.013465911315547096
          vf_explained_var: 0.23477281630039215
          vf_loss: 0.012918057478964328
    num_agent_steps_sampled: 1416000
    num_agent_steps_trained: 1416000
    num_steps_sampled: 1416000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1416,61831.1,1416000,-2.4847,-2.12,-3.32,248.47


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1417000
  custom_metrics: {}
  date: 2021-10-25_09-42-40
  done: false
  episode_len_mean: 248.25
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4824999999999906
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5229
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05532056036142855
          cur_lr: 5.000000000000001e-05
          entropy: 1.206653396288554
          entropy_coeff: 0.009999999999999998
          kl: 0.012193238707136326
          policy_loss: -0.017285683668322033
          total_loss: -0.015671353911360104
          vf_explained_var: 0.22540849447250366
          vf_loss: 0.013006327684140867
    num_agent_steps_sampled: 1417000
    num_agent_steps_trained: 1417000
    num_steps_sampled: 1417000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1417,61877.3,1417000,-2.4825,-2.12,-3.32,248.25


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1418000
  custom_metrics: {}
  date: 2021-10-25_09-43-28
  done: false
  episode_len_mean: 248.29
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4828999999999906
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5233
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05532056036142855
          cur_lr: 5.000000000000001e-05
          entropy: 1.1434535344441732
          entropy_coeff: 0.009999999999999998
          kl: 0.010108506129587783
          policy_loss: -0.026983426759640376
          total_loss: -0.024725711594025295
          vf_explained_var: 0.19450931251049042
          vf_loss: 0.013133039615220493
    num_agent_steps_sampled: 1418000
    num_agent_steps_trained: 1418000
    num_steps_sampled: 1418000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1418,61924.9,1418000,-2.4829,-2.12,-3.32,248.29


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1419000
  custom_metrics: {}
  date: 2021-10-25_09-44-14
  done: false
  episode_len_mean: 247.1
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4709999999999908
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 5237
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05532056036142855
          cur_lr: 5.000000000000001e-05
          entropy: 1.1637602673636542
          entropy_coeff: 0.009999999999999998
          kl: 0.012509904975943047
          policy_loss: -0.010438107947508494
          total_loss: -0.008476549800899293
          vf_explained_var: 0.21412554383277893
          vf_loss: 0.012907106367250284
    num_agent_steps_sampled: 1419000
    num_agent_steps_trained: 1419000
    num_steps_sampled: 1419000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1419,61971.1,1419000,-2.471,-2.12,-3.12,247.1


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1420000
  custom_metrics: {}
  date: 2021-10-25_09-45-01
  done: false
  episode_len_mean: 246.63
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.466299999999991
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 5241
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05532056036142855
          cur_lr: 5.000000000000001e-05
          entropy: 1.1482794562975565
          entropy_coeff: 0.009999999999999998
          kl: 0.015169694263653945
          policy_loss: -0.05570678735772769
          total_loss: -0.05296555045578215
          vf_explained_var: 0.18904632329940796
          vf_loss: 0.01338483491498563
    num_agent_steps_sampled: 1420000
    num_agent_steps_trained: 1420000
    num_steps_sampled: 1420000
    num_steps_trained: 142

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1420,62018.3,1420000,-2.4663,-2.12,-3.12,246.63


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1421000
  custom_metrics: {}
  date: 2021-10-25_09-45-49
  done: false
  episode_len_mean: 246.1
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.460999999999991
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 5245
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05532056036142855
          cur_lr: 5.000000000000001e-05
          entropy: 1.1501249816682604
          entropy_coeff: 0.009999999999999998
          kl: 0.009496728375219811
          policy_loss: -0.088050367600388
          total_loss: -0.08476220460401641
          vf_explained_var: 0.19986192882061005
          vf_loss: 0.014264048718743854
    num_agent_steps_sampled: 1421000
    num_agent_steps_trained: 1421000
    num_steps_sampled: 1421000
    num_steps_trained: 14210

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1421,62065.8,1421000,-2.461,-2.12,-3.12,246.1


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1422000
  custom_metrics: {}
  date: 2021-10-25_09-46-32
  done: false
  episode_len_mean: 245.52
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.455199999999991
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 5249
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05532056036142855
          cur_lr: 5.000000000000001e-05
          entropy: 1.1584234025743272
          entropy_coeff: 0.009999999999999998
          kl: 0.01592905147729932
          policy_loss: -0.09409373166660467
          total_loss: -0.08945089450312985
          vf_explained_var: 0.20728586614131927
          vf_loss: 0.015345866791903973
    num_agent_steps_sampled: 1422000
    num_agent_steps_trained: 1422000
    num_steps_sampled: 1422000
    num_steps_trained: 142

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1422,62108.7,1422000,-2.4552,-2.12,-3.12,245.52




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1423000
  custom_metrics: {}
  date: 2021-10-25_09-47-36
  done: false
  episode_len_mean: 245.58
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4557999999999915
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 5253
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05532056036142855
          cur_lr: 5.000000000000001e-05
          entropy: 1.1829046289126077
          entropy_coeff: 0.009999999999999998
          kl: 0.0181230120047905
          policy_loss: -0.03694652741154035
          total_loss: -0.0354972656402323
          vf_explained_var: 0.29011476039886475
          vf_loss: 0.012275734616236555
    num_agent_steps_sampled: 1423000
    num_agent_steps_trained: 1423000
    num_steps_sampled: 1423000
    num_steps_trained: 1423

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1423,62173.2,1423000,-2.4558,-2.12,-3.12,245.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1424000
  custom_metrics: {}
  date: 2021-10-25_09-48-19
  done: false
  episode_len_mean: 245.84
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4583999999999913
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 5257
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05532056036142855
          cur_lr: 5.000000000000001e-05
          entropy: 1.265444164805942
          entropy_coeff: 0.009999999999999998
          kl: 0.013853264541289321
          policy_loss: -0.0931171611779266
          total_loss: -0.09224097737007671
          vf_explained_var: 0.3048861622810364
          vf_loss: 0.012764258227414555
    num_agent_steps_sampled: 1424000
    num_agent_steps_trained: 1424000
    num_steps_sampled: 1424000
    num_steps_trained: 1424

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1424,62215.4,1424000,-2.4584,-2.12,-3.12,245.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1425000
  custom_metrics: {}
  date: 2021-10-25_09-49-02
  done: false
  episode_len_mean: 246.66
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4665999999999912
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 5261
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05532056036142855
          cur_lr: 5.000000000000001e-05
          entropy: 1.152639957269033
          entropy_coeff: 0.009999999999999998
          kl: 0.013121647601932556
          policy_loss: -0.001982004526588652
          total_loss: 6.294796864191691e-05
          vf_explained_var: 0.2203609198331833
          vf_loss: 0.012845458338658015
    num_agent_steps_sampled: 1425000
    num_agent_steps_trained: 1425000
    num_steps_sampled: 1425000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1425,62258.5,1425000,-2.4666,-2.14,-3.12,246.66


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1426000
  custom_metrics: {}
  date: 2021-10-25_09-49-45
  done: false
  episode_len_mean: 248.38
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4837999999999907
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5265
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05532056036142855
          cur_lr: 5.000000000000001e-05
          entropy: 1.2524437891112434
          entropy_coeff: 0.009999999999999998
          kl: 0.02108807162103628
          policy_loss: 0.002782690193918016
          total_loss: 0.00249043603738149
          vf_explained_var: 0.3045244514942169
          vf_loss: 0.011065580623431338
    num_agent_steps_sampled: 1426000
    num_agent_steps_trained: 1426000
    num_steps_sampled: 1426000
    num_steps_trained: 1426

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1426,62301.5,1426000,-2.4838,-2.14,-3.26,248.38


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1427000
  custom_metrics: {}
  date: 2021-10-25_09-50-30
  done: false
  episode_len_mean: 248.2
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4819999999999913
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5269
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08298084054214283
          cur_lr: 5.000000000000001e-05
          entropy: 1.121446172396342
          entropy_coeff: 0.009999999999999998
          kl: 0.01003103481094464
          policy_loss: 0.019340701897939048
          total_loss: 0.020275561345948115
          vf_explained_var: 0.24177180230617523
          vf_loss: 0.011316938004973862
    num_agent_steps_sampled: 1427000
    num_agent_steps_trained: 1427000
    num_steps_sampled: 1427000
    num_steps_trained: 1427

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1427,62346.5,1427000,-2.482,-2.14,-3.26,248.2


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1428000
  custom_metrics: {}
  date: 2021-10-25_09-51-17
  done: false
  episode_len_mean: 248.62
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.486199999999991
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5273
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08298084054214283
          cur_lr: 5.000000000000001e-05
          entropy: 1.072693149248759
          entropy_coeff: 0.009999999999999998
          kl: 0.009069848065862137
          policy_loss: 0.025913860814438926
          total_loss: 0.025703496568732793
          vf_explained_var: 0.3042885363101959
          vf_loss: 0.009763945284713473
    num_agent_steps_sampled: 1428000
    num_agent_steps_trained: 1428000
    num_steps_sampled: 1428000
    num_steps_trained: 1428

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1428,62393.7,1428000,-2.4862,-2.14,-3.26,248.62


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1429000
  custom_metrics: {}
  date: 2021-10-25_09-52-03
  done: false
  episode_len_mean: 248.32
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4831999999999907
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5277
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08298084054214283
          cur_lr: 5.000000000000001e-05
          entropy: 0.9563945280181037
          entropy_coeff: 0.009999999999999998
          kl: 0.012179806773652895
          policy_loss: 0.030277702543470595
          total_loss: 0.03328650287455982
          vf_explained_var: 0.1413803994655609
          vf_loss: 0.01156205370918744
    num_agent_steps_sampled: 1429000
    num_agent_steps_trained: 1429000
    num_steps_sampled: 1429000
    num_steps_trained: 1429

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1429,62440.1,1429000,-2.4832,-2.14,-3.26,248.32




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1430000
  custom_metrics: {}
  date: 2021-10-25_09-53-06
  done: false
  episode_len_mean: 248.24
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4823999999999913
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5281
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08298084054214283
          cur_lr: 5.000000000000001e-05
          entropy: 0.9129564139578078
          entropy_coeff: 0.009999999999999998
          kl: 0.00899694925423715
          policy_loss: 0.01748644196324878
          total_loss: 0.022419123103221257
          vf_explained_var: 0.15378761291503906
          vf_loss: 0.0133156708545155
    num_agent_steps_sampled: 1430000
    num_agent_steps_trained: 1430000
    num_steps_sampled: 1430000
    num_steps_trained: 14300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1430,62502.8,1430000,-2.4824,-2.14,-3.26,248.24


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1431000
  custom_metrics: {}
  date: 2021-10-25_09-53-54
  done: false
  episode_len_mean: 248.67
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.486699999999991
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5285
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08298084054214283
          cur_lr: 5.000000000000001e-05
          entropy: 0.9243220726648966
          entropy_coeff: 0.009999999999999998
          kl: 0.0069434285644028254
          policy_loss: 0.02065198669830958
          total_loss: 0.0254181487692727
          vf_explained_var: 0.18482975661754608
          vf_loss: 0.013433210003293223
    num_agent_steps_sampled: 1431000
    num_agent_steps_trained: 1431000
    num_steps_sampled: 1431000
    num_steps_trained: 1431

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1431,62550.8,1431000,-2.4867,-2.14,-3.26,248.67


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1432000
  custom_metrics: {}
  date: 2021-10-25_09-54-42
  done: false
  episode_len_mean: 248.67
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4866999999999906
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5289
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08298084054214283
          cur_lr: 5.000000000000001e-05
          entropy: 0.8789747423595853
          entropy_coeff: 0.009999999999999998
          kl: 0.003586106916250426
          policy_loss: 0.032329530268907544
          total_loss: 0.03642583265900612
          vf_explained_var: 0.1663864403963089
          vf_loss: 0.0125884715674652
    num_agent_steps_sampled: 1432000
    num_agent_steps_trained: 1432000
    num_steps_sampled: 1432000
    num_steps_trained: 14320

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1432,62598.6,1432000,-2.4867,-2.14,-3.26,248.67


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1433000
  custom_metrics: {}
  date: 2021-10-25_09-55-28
  done: false
  episode_len_mean: 249.12
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4911999999999908
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5293
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.9220494627952576
          entropy_coeff: 0.009999999999999998
          kl: 0.006134957482890455
          policy_loss: -0.008759909744064013
          total_loss: -0.004417224352558454
          vf_explained_var: 0.20607967674732208
          vf_loss: 0.01330863686485423
    num_agent_steps_sampled: 1433000
    num_agent_steps_trained: 1433000
    num_steps_sampled: 1433000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1433,62644.1,1433000,-2.4912,-2.14,-3.26,249.12


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1434000
  custom_metrics: {}
  date: 2021-10-25_09-56-15
  done: false
  episode_len_mean: 249.19
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.491899999999991
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5297
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.8823112779193454
          entropy_coeff: 0.009999999999999998
          kl: 0.005658121812942972
          policy_loss: -0.024234665267997318
          total_loss: -0.019629409660895665
          vf_explained_var: 0.17336741089820862
          vf_loss: 0.013193611397097509
    num_agent_steps_sampled: 1434000
    num_agent_steps_trained: 1434000
    num_steps_sampled: 1434000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1434,62691.7,1434000,-2.4919,-2.14,-3.26,249.19


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1435000
  custom_metrics: {}
  date: 2021-10-25_09-57-03
  done: false
  episode_len_mean: 249.21
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.492099999999991
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5301
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.9454094429810842
          entropy_coeff: 0.009999999999999998
          kl: 0.009247489315472673
          policy_loss: -0.07464048605826167
          total_loss: -0.06954751287897427
          vf_explained_var: 0.2285957783460617
          vf_loss: 0.014163384389960103
    num_agent_steps_sampled: 1435000
    num_agent_steps_trained: 1435000
    num_steps_sampled: 1435000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1435,62739.2,1435000,-2.4921,-2.14,-3.26,249.21


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1436000
  custom_metrics: {}
  date: 2021-10-25_09-57-51
  done: false
  episode_len_mean: 248.99
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4898999999999907
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 5
  episodes_total: 5306
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.9451108747058444
          entropy_coeff: 0.009999999999999998
          kl: 0.007318457082607842
          policy_loss: -0.022719444582859676
          total_loss: -0.016258862945768567
          vf_explained_var: 0.25934022665023804
          vf_loss: 0.015608044310162465
    num_agent_steps_sampled: 1436000
    num_agent_steps_trained: 1436000
    num_steps_sampled: 1436000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1436,62787.1,1436000,-2.4899,-2.14,-3.26,248.99




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1437000
  custom_metrics: {}
  date: 2021-10-25_09-58-52
  done: false
  episode_len_mean: 249.01
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4900999999999907
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5310
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 1.1068538943926494
          entropy_coeff: 0.009999999999999998
          kl: 0.011900014844647804
          policy_loss: -0.0026072606444358824
          total_loss: -0.0008865397837426927
          vf_explained_var: 0.3225947916507721
          vf_loss: 0.012295525728000535
    num_agent_steps_sampled: 1437000
    num_agent_steps_trained: 1437000
    num_steps_sampled: 1437000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1437,62849,1437000,-2.4901,-2.14,-3.26,249.01


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1438000
  custom_metrics: {}
  date: 2021-10-25_09-59-39
  done: false
  episode_len_mean: 249.04
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4903999999999904
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5314
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 1.1320805337693955
          entropy_coeff: 0.009999999999999998
          kl: 0.00908853084525734
          policy_loss: -0.007793863034910626
          total_loss: -0.00661377501156595
          vf_explained_var: 0.34543517231941223
          vf_loss: 0.012123807736982902
    num_agent_steps_sampled: 1438000
    num_agent_steps_trained: 1438000
    num_steps_sampled: 1438000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1438,62895.6,1438000,-2.4904,-2.14,-3.26,249.04


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1439000
  custom_metrics: {}
  date: 2021-10-25_10-00-25
  done: false
  episode_len_mean: 248.91
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.489099999999991
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 3
  episodes_total: 5317
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 1.0162529740068647
          entropy_coeff: 0.009999999999999998
          kl: 0.011988392602602952
          policy_loss: -0.10948546297020383
          total_loss: -0.10677309234937032
          vf_explained_var: 0.2809649407863617
          vf_loss: 0.012377498899069098
    num_agent_steps_sampled: 1439000
    num_agent_steps_trained: 1439000
    num_steps_sampled: 1439000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1439,62941.1,1439000,-2.4891,-2.14,-3.26,248.91


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1440000
  custom_metrics: {}
  date: 2021-10-25_10-01-11
  done: false
  episode_len_mean: 249.14
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4913999999999903
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5321
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.9454616612858242
          entropy_coeff: 0.009999999999999998
          kl: 0.009136882014016755
          policy_loss: -0.11467530421084828
          total_loss: -0.1097699479924308
          vf_explained_var: 0.22819308936595917
          vf_loss: 0.013980877658145294
    num_agent_steps_sampled: 1440000
    num_agent_steps_trained: 1440000
    num_steps_sampled: 1440000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1440,62987.1,1440000,-2.4914,-2.14,-3.26,249.14


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1441000
  custom_metrics: {}
  date: 2021-10-25_10-01-56
  done: false
  episode_len_mean: 249.08
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4907999999999912
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 5
  episodes_total: 5326
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.8749875048796336
          entropy_coeff: 0.009999999999999998
          kl: 0.009269631078494732
          policy_loss: -0.017778220938311683
          total_loss: -0.013521092053916718
          vf_explained_var: 0.24479365348815918
          vf_loss: 0.012622402225517564
    num_agent_steps_sampled: 1441000
    num_agent_steps_trained: 1441000
    num_steps_sampled: 1441000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1441,63032.1,1441000,-2.4908,-2.14,-3.26,249.08


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1442000
  custom_metrics: {}
  date: 2021-10-25_10-02-43
  done: false
  episode_len_mean: 248.92
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.489199999999991
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5330
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.8567678299215105
          entropy_coeff: 0.009999999999999998
          kl: 0.013094640659219357
          policy_loss: 0.0022924034959740107
          total_loss: 0.007137970957491133
          vf_explained_var: 0.14091700315475464
          vf_loss: 0.012869943357590173
    num_agent_steps_sampled: 1442000
    num_agent_steps_trained: 1442000
    num_steps_sampled: 1442000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1442,63079.2,1442000,-2.4892,-2.14,-3.26,248.92


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1443000
  custom_metrics: {}
  date: 2021-10-25_10-03-30
  done: false
  episode_len_mean: 248.94
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4893999999999905
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5334
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.8161631643772125
          entropy_coeff: 0.009999999999999998
          kl: 0.00924404538802719
          policy_loss: 0.022330251998371548
          total_loss: 0.026989300631814532
          vf_explained_var: 0.10916180163621902
          vf_loss: 0.012437139855076869
    num_agent_steps_sampled: 1443000
    num_agent_steps_trained: 1443000
    num_steps_sampled: 1443000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1443,63125.9,1443000,-2.4894,-2.14,-3.26,248.94


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1444000
  custom_metrics: {}
  date: 2021-10-25_10-04-16
  done: false
  episode_len_mean: 248.84
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4883999999999906
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5338
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.7458900491396586
          entropy_coeff: 0.009999999999999998
          kl: 0.006248130214587554
          policy_loss: 0.018009845746888055
          total_loss: 0.023822854583462078
          vf_explained_var: 0.12253457307815552
          vf_loss: 0.013012671894911263
    num_agent_steps_sampled: 1444000
    num_agent_steps_trained: 1444000
    num_steps_sampled: 1444000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1444,63172,1444000,-2.4884,-2.14,-3.26,248.84




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1445000
  custom_metrics: {}
  date: 2021-10-25_10-05-19
  done: false
  episode_len_mean: 248.92
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4891999999999905
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5342
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.782039694653617
          entropy_coeff: 0.009999999999999998
          kl: 0.016562567020644972
          policy_loss: 0.02055411371919844
          total_loss: 0.026324282503790326
          vf_explained_var: 0.10907240957021713
          vf_loss: 0.012903378821081585
    num_agent_steps_sampled: 1445000
    num_agent_steps_trained: 1445000
    num_steps_sampled: 1445000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1445,63235.1,1445000,-2.4892,-2.14,-3.26,248.92


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1446000
  custom_metrics: {}
  date: 2021-10-25_10-06-05
  done: false
  episode_len_mean: 249.15
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4914999999999905
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5346
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.856888335943222
          entropy_coeff: 0.009999999999999998
          kl: 0.012674024221667244
          policy_loss: 0.02179835910598437
          total_loss: 0.026344959437847138
          vf_explained_var: 0.12828528881072998
          vf_loss: 0.012589636010428269
    num_agent_steps_sampled: 1446000
    num_agent_steps_trained: 1446000
    num_steps_sampled: 1446000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1446,63281.3,1446000,-2.4915,-2.14,-3.26,249.15


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1447000
  custom_metrics: {}
  date: 2021-10-25_10-06-50
  done: false
  episode_len_mean: 249.45
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4944999999999906
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5350
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.8568311293919881
          entropy_coeff: 0.009999999999999998
          kl: 0.010806630244848807
          policy_loss: 0.002891940126816432
          total_loss: 0.008331477228138182
          vf_explained_var: 0.17458949983119965
          vf_loss: 0.013559477031230926
    num_agent_steps_sampled: 1447000
    num_agent_steps_trained: 1447000
    num_steps_sampled: 1447000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1447,63326.6,1447000,-2.4945,-2.14,-3.26,249.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1448000
  custom_metrics: {}
  date: 2021-10-25_10-07-35
  done: false
  episode_len_mean: 249.33
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4932999999999907
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5354
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.8529600825574662
          entropy_coeff: 0.009999999999999998
          kl: 0.006872746713976172
          policy_loss: 0.01297578008638488
          total_loss: 0.017019282860888375
          vf_explained_var: 0.18293462693691254
          vf_loss: 0.012287949946605497
    num_agent_steps_sampled: 1448000
    num_agent_steps_trained: 1448000
    num_steps_sampled: 1448000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1448,63371.6,1448000,-2.4933,-2.14,-3.26,249.33


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1449000
  custom_metrics: {}
  date: 2021-10-25_10-08-21
  done: false
  episode_len_mean: 248.77
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4876999999999905
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 5358
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.849607092804379
          entropy_coeff: 0.009999999999999998
          kl: 0.006765747518048531
          policy_loss: 0.021912104553646512
          total_loss: 0.025756022251314587
          vf_explained_var: 0.21785394847393036
          vf_loss: 0.012059274791843362
    num_agent_steps_sampled: 1449000
    num_agent_steps_trained: 1449000
    num_steps_sampled: 1449000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1449,63417.1,1449000,-2.4877,-2.14,-3.26,248.77


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1450000
  custom_metrics: {}
  date: 2021-10-25_10-09-06
  done: false
  episode_len_mean: 248.05
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.480499999999991
  episode_reward_min: -2.8399999999999834
  episodes_this_iter: 4
  episodes_total: 5362
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.937316522995631
          entropy_coeff: 0.009999999999999998
          kl: 0.010570141497618776
          policy_loss: -0.008616904997163348
          total_loss: -0.003494036859936184
          vf_explained_var: 0.22067710757255554
          vf_loss: 0.014057472472389539
    num_agent_steps_sampled: 1450000
    num_agent_steps_trained: 1450000
    num_steps_sampled: 1450000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1450,63462.3,1450000,-2.4805,-2.14,-2.84,248.05


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1451000
  custom_metrics: {}
  date: 2021-10-25_10-09-53
  done: false
  episode_len_mean: 247.58
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.475799999999991
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 5366
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.924947769774331
          entropy_coeff: 0.009999999999999998
          kl: 0.008532250374073815
          policy_loss: 0.02162019411722819
          total_loss: 0.02430207340253724
          vf_explained_var: 0.27506232261657715
          vf_loss: 0.011577348462823364
    num_agent_steps_sampled: 1451000
    num_agent_steps_trained: 1451000
    num_steps_sampled: 1451000
    num_steps_trained: 14510

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1451,63509.1,1451000,-2.4758,-2.14,-2.71,247.58




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1452000
  custom_metrics: {}
  date: 2021-10-25_10-10-57
  done: false
  episode_len_mean: 247.45
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.4744999999999915
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 5370
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.924979074133767
          entropy_coeff: 0.009999999999999998
          kl: 0.009197173684147665
          policy_loss: 0.018486793546213045
          total_loss: 0.023350168392062186
          vf_explained_var: 0.2468544840812683
          vf_loss: 0.013731574991510974
    num_agent_steps_sampled: 1452000
    num_agent_steps_trained: 1452000
    num_steps_sampled: 1452000
    num_steps_trained: 145

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1452,63573.2,1452000,-2.4745,-2.14,-2.71,247.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1453000
  custom_metrics: {}
  date: 2021-10-25_10-11-44
  done: false
  episode_len_mean: 247.56
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.475599999999991
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 5374
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.9177509380711449
          entropy_coeff: 0.009999999999999998
          kl: 0.008423693915278186
          policy_loss: -0.010555975139141083
          total_loss: -0.005501387930578656
          vf_explained_var: 0.24959172308444977
          vf_loss: 0.013882592724015316
    num_agent_steps_sampled: 1453000
    num_agent_steps_trained: 1453000
    num_steps_sampled: 1453000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1453,63620,1453000,-2.4756,-2.14,-2.71,247.56


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1454000
  custom_metrics: {}
  date: 2021-10-25_10-12-30
  done: false
  episode_len_mean: 247.88
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.478799999999991
  episode_reward_min: -2.869999999999983
  episodes_this_iter: 4
  episodes_total: 5378
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.8697864015897115
          entropy_coeff: 0.009999999999999998
          kl: 0.010725508685453179
          policy_loss: 0.018017716084917385
          total_loss: 0.023218248867326312
          vf_explained_var: 0.3279259204864502
          vf_loss: 0.013453389849099847
    num_agent_steps_sampled: 1454000
    num_agent_steps_trained: 1454000
    num_steps_sampled: 1454000
    num_steps_trained: 145

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1454,63665.6,1454000,-2.4788,-2.14,-2.87,247.88


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1455000
  custom_metrics: {}
  date: 2021-10-25_10-13-16
  done: false
  episode_len_mean: 248.38
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.4837999999999907
  episode_reward_min: -2.869999999999983
  episodes_this_iter: 4
  episodes_total: 5382
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.9370059384240045
          entropy_coeff: 0.009999999999999998
          kl: 0.010079178388619622
          policy_loss: -0.0033478305571609072
          total_loss: 0.0005575656063026852
          vf_explained_var: 0.36203575134277344
          vf_loss: 0.012857264207883013
    num_agent_steps_sampled: 1455000
    num_agent_steps_trained: 1455000
    num_steps_sampled: 1455000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1455,63711.9,1455000,-2.4838,-2.2,-2.87,248.38


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1456000
  custom_metrics: {}
  date: 2021-10-25_10-14-02
  done: false
  episode_len_mean: 248.52
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.4851999999999905
  episode_reward_min: -2.869999999999983
  episodes_this_iter: 3
  episodes_total: 5385
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.9632129026783838
          entropy_coeff: 0.009999999999999998
          kl: 0.009641515042878347
          policy_loss: -0.10510962357123693
          total_loss: -0.1018697015941143
          vf_explained_var: 0.41458237171173096
          vf_loss: 0.012472018278721306
    num_agent_steps_sampled: 1456000
    num_agent_steps_trained: 1456000
    num_steps_sampled: 1456000
    num_steps_trained: 145

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1456,63758.3,1456000,-2.4852,-2.2,-2.87,248.52


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1457000
  custom_metrics: {}
  date: 2021-10-25_10-14-45
  done: false
  episode_len_mean: 249.33
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.4932999999999907
  episode_reward_min: -2.869999999999983
  episodes_this_iter: 4
  episodes_total: 5389
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.9664079573419359
          entropy_coeff: 0.009999999999999998
          kl: 0.013384806039411699
          policy_loss: -0.005426249528924624
          total_loss: -0.0031117937217156094
          vf_explained_var: 0.4561122953891754
          vf_loss: 0.01142319402553969
    num_agent_steps_sampled: 1457000
    num_agent_steps_trained: 1457000
    num_steps_sampled: 1457000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1457,63800.9,1457000,-2.4933,-2.2,-2.87,249.33


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1458000
  custom_metrics: {}
  date: 2021-10-25_10-15-27
  done: false
  episode_len_mean: 250.6
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.5059999999999905
  episode_reward_min: -2.9299999999999815
  episodes_this_iter: 4
  episodes_total: 5393
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 1.1369001355436112
          entropy_coeff: 0.009999999999999998
          kl: 0.013380269243520518
          policy_loss: 0.03257958110835817
          total_loss: 0.03124248008761141
          vf_explained_var: 0.5370270013809204
          vf_loss: 0.009476742495058312
    num_agent_steps_sampled: 1458000
    num_agent_steps_trained: 1458000
    num_steps_sampled: 1458000
    num_steps_trained: 14580

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1458,63843.3,1458000,-2.506,-2.2,-2.93,250.6


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1459000
  custom_metrics: {}
  date: 2021-10-25_10-16-14
  done: false
  episode_len_mean: 251.39
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.5138999999999903
  episode_reward_min: -2.9299999999999815
  episodes_this_iter: 4
  episodes_total: 5397
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 1.0053854352898068
          entropy_coeff: 0.009999999999999998
          kl: 0.012105517861439985
          policy_loss: -0.017303156438801023
          total_loss: -0.014872736069891188
          vf_explained_var: 0.46851447224617004
          vf_loss: 0.01198201347142458
    num_agent_steps_sampled: 1459000
    num_agent_steps_trained: 1459000
    num_steps_sampled: 1459000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1459,63889.5,1459000,-2.5139,-2.2,-2.93,251.39




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1460000
  custom_metrics: {}
  date: 2021-10-25_10-17-18
  done: false
  episode_len_mean: 251.7
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.51699999999999
  episode_reward_min: -2.9299999999999815
  episodes_this_iter: 4
  episodes_total: 5401
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 1.0205650508403779
          entropy_coeff: 0.009999999999999998
          kl: 0.01266939228954662
          policy_loss: -0.02382715323732959
          total_loss: -0.021992678898904057
          vf_explained_var: 0.5124222040176392
          vf_loss: 0.011514467145833705
    num_agent_steps_sampled: 1460000
    num_agent_steps_trained: 1460000
    num_steps_sampled: 1460000
    num_steps_trained: 14600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1460,63953.3,1460000,-2.517,-2.2,-2.93,251.7


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1461000
  custom_metrics: {}
  date: 2021-10-25_10-18-04
  done: false
  episode_len_mean: 252.01
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.5200999999999905
  episode_reward_min: -2.9299999999999815
  episodes_this_iter: 4
  episodes_total: 5405
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.8299471186267005
          entropy_coeff: 0.009999999999999998
          kl: 0.007252038713831885
          policy_loss: -0.0045309344927469885
          total_loss: -0.0009225289854738447
          vf_explained_var: 0.4706454277038574
          vf_loss: 0.011606986706869469
    num_agent_steps_sampled: 1461000
    num_agent_steps_trained: 1461000
    num_steps_sampled: 1461000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1461,64000.2,1461000,-2.5201,-2.2,-2.93,252.01


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1462000
  custom_metrics: {}
  date: 2021-10-25_10-18-49
  done: false
  episode_len_mean: 252.45
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.5244999999999895
  episode_reward_min: -2.9299999999999815
  episodes_this_iter: 3
  episodes_total: 5408
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.8112737801339891
          entropy_coeff: 0.009999999999999998
          kl: 0.00906887937059921
          policy_loss: -0.08444162292612924
          total_loss: -0.08047622533308135
          vf_explained_var: 0.4694976806640625
          vf_loss: 0.01170186527694265
    num_agent_steps_sampled: 1462000
    num_agent_steps_trained: 1462000
    num_steps_sampled: 1462000
    num_steps_trained: 1462

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1462,64045.1,1462000,-2.5245,-2.2,-2.93,252.45


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1463000
  custom_metrics: {}
  date: 2021-10-25_10-19-35
  done: false
  episode_len_mean: 252.75
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.52749999999999
  episode_reward_min: -2.9799999999999804
  episodes_this_iter: 4
  episodes_total: 5412
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.659628466102812
          entropy_coeff: 0.009999999999999998
          kl: 0.00910057156270771
          policy_loss: -0.11457566602362526
          total_loss: -0.10600511233011882
          vf_explained_var: 0.4404682517051697
          vf_loss: 0.014789253475868867
    num_agent_steps_sampled: 1463000
    num_agent_steps_trained: 1463000
    num_steps_sampled: 1463000
    num_steps_trained: 146300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1463,64091.1,1463000,-2.5275,-2.2,-2.98,252.75


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1464000
  custom_metrics: {}
  date: 2021-10-25_10-20-23
  done: false
  episode_len_mean: 252.1
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.52099999999999
  episode_reward_min: -2.9799999999999804
  episodes_this_iter: 5
  episodes_total: 5417
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.5810312820805443
          entropy_coeff: 0.009999999999999998
          kl: 0.008386509251124876
          policy_loss: -0.030771939787599775
          total_loss: -0.023013259801599715
          vf_explained_var: 0.358415424823761
          vf_loss: 0.013221033797081973
    num_agent_steps_sampled: 1464000
    num_agent_steps_trained: 1464000
    num_steps_sampled: 1464000
    num_steps_trained: 1464

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1464,64138.6,1464000,-2.521,-2.2,-2.98,252.1


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1465000
  custom_metrics: {}
  date: 2021-10-25_10-21-11
  done: false
  episode_len_mean: 251.53
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.5152999999999897
  episode_reward_min: -2.9799999999999804
  episodes_this_iter: 4
  episodes_total: 5421
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041490420271071415
          cur_lr: 5.000000000000001e-05
          entropy: 0.6447578883833356
          entropy_coeff: 0.009999999999999998
          kl: 0.0724758284831831
          policy_loss: 0.01791162581907378
          total_loss: 0.025294941829310524
          vf_explained_var: 0.24510574340820312
          vf_loss: 0.010823844963063796
    num_agent_steps_sampled: 1465000
    num_agent_steps_trained: 1465000
    num_steps_sampled: 1465000
    num_steps_trained: 1465

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1465,64186.2,1465000,-2.5153,-2.2,-2.98,251.53


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1466000
  custom_metrics: {}
  date: 2021-10-25_10-21-54
  done: false
  episode_len_mean: 252.01
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.52009999999999
  episode_reward_min: -2.9799999999999804
  episodes_this_iter: 4
  episodes_total: 5425
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 0.7533664597405327
          entropy_coeff: 0.009999999999999998
          kl: 0.009780636708646758
          policy_loss: 0.026718796127372317
          total_loss: 0.03089385504523913
          vf_explained_var: 0.17176851630210876
          vf_loss: 0.011100018407321638
    num_agent_steps_sampled: 1466000
    num_agent_steps_trained: 1466000
    num_steps_sampled: 1466000
    num_steps_trained: 14660

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1466,64230.1,1466000,-2.5201,-2.2,-2.98,252.01


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1467000
  custom_metrics: {}
  date: 2021-10-25_10-22-38
  done: false
  episode_len_mean: 252.75
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.52749999999999
  episode_reward_min: -2.9799999999999804
  episodes_this_iter: 4
  episodes_total: 5429
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 0.7992147207260132
          entropy_coeff: 0.009999999999999998
          kl: 0.005788133333792207
          policy_loss: -0.004319549476106962
          total_loss: 6.427516539891561e-05
          vf_explained_var: 0.18290232121944427
          vf_loss: 0.012015741339160337
    num_agent_steps_sampled: 1467000
    num_agent_steps_trained: 1467000
    num_steps_sampled: 1467000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1467,64273.4,1467000,-2.5275,-2.2,-2.98,252.75




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1468000
  custom_metrics: {}
  date: 2021-10-25_10-23-41
  done: false
  episode_len_mean: 252.68
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.52679999999999
  episode_reward_min: -2.9799999999999804
  episodes_this_iter: 4
  episodes_total: 5433
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 0.9198510203096602
          entropy_coeff: 0.009999999999999998
          kl: 0.015902654748522593
          policy_loss: 0.00352387817369567
          total_loss: 0.007325779232713912
          vf_explained_var: 0.1368589550256729
          vf_loss: 0.012010704612152444
    num_agent_steps_sampled: 1468000
    num_agent_steps_trained: 1468000
    num_steps_sampled: 1468000
    num_steps_trained: 146800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1468,64336.1,1468000,-2.5268,-2.2,-2.98,252.68


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1469000
  custom_metrics: {}
  date: 2021-10-25_10-24-22
  done: false
  episode_len_mean: 253.52
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.5351999999999895
  episode_reward_min: -2.9799999999999804
  episodes_this_iter: 3
  episodes_total: 5436
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 1.10616547399097
          entropy_coeff: 0.009999999999999998
          kl: 0.011032619296269926
          policy_loss: 0.011013177699512905
          total_loss: 0.010140770177046458
          vf_explained_var: 0.1357349455356598
          vf_loss: 0.00950262246058426
    num_agent_steps_sampled: 1469000
    num_agent_steps_trained: 1469000
    num_steps_sampled: 1469000
    num_steps_trained: 146900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1469,64378,1469000,-2.5352,-2.2,-2.98,253.52


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1470000
  custom_metrics: {}
  date: 2021-10-25_10-25-04
  done: false
  episode_len_mean: 255.08
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.5507999999999895
  episode_reward_min: -2.9799999999999804
  episodes_this_iter: 4
  episodes_total: 5440
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 1.1634522954622903
          entropy_coeff: 0.009999999999999998
          kl: 0.008929653735217534
          policy_loss: 0.013488942136367163
          total_loss: 0.0160935763683584
          vf_explained_var: 0.11248699575662613
          vf_loss: 0.013683412679367596
    num_agent_steps_sampled: 1470000
    num_agent_steps_trained: 1470000
    num_steps_sampled: 1470000
    num_steps_trained: 1470

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1470,64419,1470000,-2.5508,-2.24,-2.98,255.08


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1471000
  custom_metrics: {}
  date: 2021-10-25_10-25-46
  done: false
  episode_len_mean: 255.91
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.5590999999999893
  episode_reward_min: -2.9799999999999804
  episodes_this_iter: 3
  episodes_total: 5443
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 0.9896216260062324
          entropy_coeff: 0.009999999999999998
          kl: 0.01120196157071089
          policy_loss: -0.08583354949951172
          total_loss: -0.08277494957049687
          vf_explained_var: 0.07343843579292297
          vf_loss: 0.012257656330863636
    num_agent_steps_sampled: 1471000
    num_agent_steps_trained: 1471000
    num_steps_sampled: 1471000
    num_steps_trained: 147

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1471,64461.5,1471000,-2.5591,-2.24,-2.98,255.91


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1472000
  custom_metrics: {}
  date: 2021-10-25_10-26-29
  done: false
  episode_len_mean: 256.53
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.565299999999989
  episode_reward_min: -2.9799999999999804
  episodes_this_iter: 4
  episodes_total: 5447
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 0.9625237637095981
          entropy_coeff: 0.009999999999999998
          kl: 0.008833656070642586
          policy_loss: 0.006616428991158803
          total_loss: 0.01206781996621026
          vf_explained_var: 0.08678995072841644
          vf_loss: 0.0145268601572348
    num_agent_steps_sampled: 1472000
    num_agent_steps_trained: 1472000
    num_steps_sampled: 1472000
    num_steps_trained: 147200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1472,64504.1,1472000,-2.5653,-2.24,-2.98,256.53


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1473000
  custom_metrics: {}
  date: 2021-10-25_10-27-10
  done: false
  episode_len_mean: 257.1
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.570999999999989
  episode_reward_min: -2.9799999999999804
  episodes_this_iter: 4
  episodes_total: 5451
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 1.065947261121538
          entropy_coeff: 0.009999999999999998
          kl: 0.012077263869759254
          policy_loss: 0.008889906439516279
          total_loss: 0.013071499806311395
          vf_explained_var: 0.1510745733976364
          vf_loss: 0.014089430009739267
    num_agent_steps_sampled: 1473000
    num_agent_steps_trained: 1473000
    num_steps_sampled: 1473000
    num_steps_trained: 147300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1473,64545.9,1473000,-2.571,-2.24,-2.98,257.1


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1474000
  custom_metrics: {}
  date: 2021-10-25_10-27-54
  done: false
  episode_len_mean: 257.81
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.5780999999999885
  episode_reward_min: -2.9799999999999804
  episodes_this_iter: 3
  episodes_total: 5454
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 0.9848233229584165
          entropy_coeff: 0.009999999999999998
          kl: 0.016760782012623945
          policy_loss: -0.1188434135582712
          total_loss: -0.1137473452422354
          vf_explained_var: 0.20269976556301117
          vf_loss: 0.013901184840748707
    num_agent_steps_sampled: 1474000
    num_agent_steps_trained: 1474000
    num_steps_sampled: 1474000
    num_steps_trained: 1474

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1474,64589.1,1474000,-2.5781,-2.24,-2.98,257.81


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1475000
  custom_metrics: {}
  date: 2021-10-25_10-28-35
  done: false
  episode_len_mean: 259.04
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.5903999999999883
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 5458
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 1.1605709056059519
          entropy_coeff: 0.009999999999999998
          kl: 0.015135045834252498
          policy_loss: 0.010000771284103394
          total_loss: 0.012369964768489202
          vf_explained_var: 0.2541143596172333
          vf_loss: 0.01303296649000711
    num_agent_steps_sampled: 1475000
    num_agent_steps_trained: 1475000
    num_steps_sampled: 1475000
    num_steps_trained: 1475

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1475,64630.4,1475000,-2.5904,-2.24,-3.02,259.04




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1476000
  custom_metrics: {}
  date: 2021-10-25_10-29-37
  done: false
  episode_len_mean: 259.13
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.5912999999999884
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 5462
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 1.0713089850213793
          entropy_coeff: 0.009999999999999998
          kl: 0.008328268044520195
          policy_loss: 0.030854822943607967
          total_loss: 0.033829118228620954
          vf_explained_var: 0.24278613924980164
          vf_loss: 0.013169071750922336
    num_agent_steps_sampled: 1476000
    num_agent_steps_trained: 1476000
    num_steps_sampled: 1476000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1476,64692,1476000,-2.5913,-2.24,-3.02,259.13


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1477000
  custom_metrics: {}
  date: 2021-10-25_10-30-21
  done: false
  episode_len_mean: 259.66
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.5965999999999885
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 5466
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 1.1165955675972832
          entropy_coeff: 0.009999999999999998
          kl: 0.010707700405604273
          policy_loss: 0.027392448236544926
          total_loss: 0.02797770909965038
          vf_explained_var: 0.3330237567424774
          vf_loss: 0.011084815663182073
    num_agent_steps_sampled: 1477000
    num_agent_steps_trained: 1477000
    num_steps_sampled: 1477000
    num_steps_trained: 1477

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1477,64736.5,1477000,-2.5966,-2.24,-3.02,259.66


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1478000
  custom_metrics: {}
  date: 2021-10-25_10-31-06
  done: false
  episode_len_mean: 260.3
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.602999999999988
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 3
  episodes_total: 5469
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 0.9500622365209791
          entropy_coeff: 0.009999999999999998
          kl: 0.009109856121449253
          policy_loss: -0.08017276600003242
          total_loss: -0.07648615290721257
          vf_explained_var: 0.28304994106292725
          vf_loss: 0.0126202753227618
    num_agent_steps_sampled: 1478000
    num_agent_steps_trained: 1478000
    num_steps_sampled: 1478000
    num_steps_trained: 147800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1478,64781.4,1478000,-2.603,-2.24,-3.02,260.3


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1479000
  custom_metrics: {}
  date: 2021-10-25_10-31-52
  done: false
  episode_len_mean: 260.95
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.609499999999988
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 5473
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 0.9404513855775197
          entropy_coeff: 0.009999999999999998
          kl: 0.009622061486088443
          policy_loss: -0.01534676460756196
          total_loss: -0.010478788448704614
          vf_explained_var: 0.2719266414642334
          vf_loss: 0.013673656920178068
    num_agent_steps_sampled: 1479000
    num_agent_steps_trained: 1479000
    num_steps_sampled: 1479000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1479,64826.8,1479000,-2.6095,-2.28,-3.02,260.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1480000
  custom_metrics: {}
  date: 2021-10-25_10-32-38
  done: false
  episode_len_mean: 261.17
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.6116999999999875
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 5477
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 0.9356641862127516
          entropy_coeff: 0.009999999999999998
          kl: 0.0065262055887877245
          policy_loss: -0.03223115247156885
          total_loss: -0.0277050471968121
          vf_explained_var: 0.29235583543777466
          vf_loss: 0.01347658500696222
    num_agent_steps_sampled: 1480000
    num_agent_steps_trained: 1480000
    num_steps_sampled: 1480000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1480,64873.2,1480000,-2.6117,-2.28,-3.02,261.17


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1481000
  custom_metrics: {}
  date: 2021-10-25_10-33-22
  done: false
  episode_len_mean: 261.12
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.611199999999988
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 5481
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 0.9508970114919875
          entropy_coeff: 0.009999999999999998
          kl: 0.006495829240045875
          policy_loss: 0.03691519233915541
          total_loss: 0.03838822642962138
          vf_explained_var: 0.3148188292980194
          vf_loss: 0.010577729838486347
    num_agent_steps_sampled: 1481000
    num_agent_steps_trained: 1481000
    num_steps_sampled: 1481000
    num_steps_trained: 14810

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1481,64916.8,1481000,-2.6112,-2.28,-3.02,261.12


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1482000
  custom_metrics: {}
  date: 2021-10-25_10-34-06
  done: false
  episode_len_mean: 261.37
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.6136999999999886
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 5485
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 0.9170907272232903
          entropy_coeff: 0.009999999999999998
          kl: 0.009695182947952692
          policy_loss: 0.034281191147036025
          total_loss: 0.03925218449698554
          vf_explained_var: 0.17460069060325623
          vf_loss: 0.013538515961004628
    num_agent_steps_sampled: 1482000
    num_agent_steps_trained: 1482000
    num_steps_sampled: 1482000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1482,64961.4,1482000,-2.6137,-2.28,-3.02,261.37


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1483000
  custom_metrics: {}
  date: 2021-10-25_10-34-50
  done: false
  episode_len_mean: 261.76
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.6175999999999884
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 5489
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 1.0107112526893616
          entropy_coeff: 0.009999999999999998
          kl: 0.01264880072770451
          policy_loss: 0.020562475836939282
          total_loss: 0.023547485967477164
          vf_explained_var: 0.18623583018779755
          vf_loss: 0.012304918964703877
    num_agent_steps_sampled: 1483000
    num_agent_steps_trained: 1483000
    num_steps_sampled: 1483000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1483,65005.4,1483000,-2.6176,-2.28,-3.02,261.76




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1484000
  custom_metrics: {}
  date: 2021-10-25_10-35-52
  done: false
  episode_len_mean: 260.35
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6034999999999884
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 5493
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 0.9473797970347935
          entropy_coeff: 0.009999999999999998
          kl: 0.009134506767104157
          policy_loss: 0.011246183928516175
          total_loss: 0.016743399699529014
          vf_explained_var: 0.1766503006219864
          vf_loss: 0.014402522374358443
    num_agent_steps_sampled: 1484000
    num_agent_steps_trained: 1484000
    num_steps_sampled: 1484000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1484,65067.1,1484000,-2.6035,-2.19,-3.02,260.35


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1485000
  custom_metrics: {}
  date: 2021-10-25_10-36-38
  done: false
  episode_len_mean: 260.48
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6047999999999885
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 5497
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 1.0962942679723104
          entropy_coeff: 0.009999999999999998
          kl: 0.010476268461522172
          policy_loss: -0.0074111125121514
          total_loss: -0.00408169718252288
          vf_explained_var: 0.2889237105846405
          vf_loss: 0.013640357346998321
    num_agent_steps_sampled: 1485000
    num_agent_steps_trained: 1485000
    num_steps_sampled: 1485000
    num_steps_trained: 148

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1485,65112.9,1485000,-2.6048,-2.19,-3.02,260.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1486000
  custom_metrics: {}
  date: 2021-10-25_10-37-22
  done: false
  episode_len_mean: 261.08
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.610799999999988
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 3
  episodes_total: 5500
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 1.0278546233971915
          entropy_coeff: 0.009999999999999998
          kl: 0.008035675642844147
          policy_loss: -0.0010782254652844535
          total_loss: -0.0011668315364254844
          vf_explained_var: 0.33352741599082947
          vf_loss: 0.009689836447230643
    num_agent_steps_sampled: 1486000
    num_agent_steps_trained: 1486000
    num_steps_sampled: 1486000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1486,65157.1,1486000,-2.6108,-2.19,-3.02,261.08


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1487000
  custom_metrics: {}
  date: 2021-10-25_10-38-03
  done: false
  episode_len_mean: 262.15
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6214999999999886
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 5504
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06223563040660714
          cur_lr: 5.000000000000001e-05
          entropy: 1.5086557189623515
          entropy_coeff: 0.009999999999999998
          kl: 0.029232044620422462
          policy_loss: -0.011296390121181806
          total_loss: -0.01234089413450824
          vf_explained_var: 0.5128421187400818
          vf_loss: 0.012222777275989452
    num_agent_steps_sampled: 1487000
    num_agent_steps_trained: 1487000
    num_steps_sampled: 1487000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1487,65198.2,1487000,-2.6215,-2.19,-3.33,262.15


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1488000
  custom_metrics: {}
  date: 2021-10-25_10-38-43
  done: false
  episode_len_mean: 263.44
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6343999999999874
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 5507
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09335344560991071
          cur_lr: 5.000000000000001e-05
          entropy: 1.3690788560443454
          entropy_coeff: 0.009999999999999998
          kl: 0.014852649399988701
          policy_loss: 0.01864876366323895
          total_loss: 0.014628936019208696
          vf_explained_var: 0.6283168792724609
          vf_loss: 0.008284419317108889
    num_agent_steps_sampled: 1488000
    num_agent_steps_trained: 1488000
    num_steps_sampled: 1488000
    num_steps_trained: 1488

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1488,65237.6,1488000,-2.6344,-2.19,-3.33,263.44


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1489000
  custom_metrics: {}
  date: 2021-10-25_10-39-25
  done: false
  episode_len_mean: 264.71
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.647099999999987
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 4
  episodes_total: 5511
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09335344560991071
          cur_lr: 5.000000000000001e-05
          entropy: 1.5570808755026924
          entropy_coeff: 0.009999999999999998
          kl: 0.02503810017621779
          policy_loss: -0.03611989426943991
          total_loss: -0.03851845289270083
          vf_explained_var: 0.6104695796966553
          vf_loss: 0.010834857154016693
    num_agent_steps_sampled: 1489000
    num_agent_steps_trained: 1489000
    num_steps_sampled: 1489000
    num_steps_trained: 1489

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1489,65280.1,1489000,-2.6471,-2.19,-3.6,264.71


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1490000
  custom_metrics: {}
  date: 2021-10-25_10-40-02
  done: false
  episode_len_mean: 266.75
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.667499999999986
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 3
  episodes_total: 5514
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14003016841486607
          cur_lr: 5.000000000000001e-05
          entropy: 1.5712504320674472
          entropy_coeff: 0.009999999999999998
          kl: 0.015236852289303235
          policy_loss: 0.036777396665679085
          total_loss: 0.030594470848639806
          vf_explained_var: 0.6936201453208923
          vf_loss: 0.007395957275811169
    num_agent_steps_sampled: 1490000
    num_agent_steps_trained: 1490000
    num_steps_sampled: 1490000
    num_steps_trained: 149

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1490,65316.4,1490000,-2.6675,-2.19,-3.6,266.75


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1491000
  custom_metrics: {}
  date: 2021-10-25_10-40-46
  done: false
  episode_len_mean: 267.97
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6796999999999866
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 3
  episodes_total: 5517
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14003016841486607
          cur_lr: 5.000000000000001e-05
          entropy: 1.1749708275000255
          entropy_coeff: 0.009999999999999998
          kl: 0.008521085136115877
          policy_loss: -0.09234805458949671
          total_loss: -0.09255198025041156
          vf_explained_var: 0.6307449340820312
          vf_loss: 0.010352571432789167
    num_agent_steps_sampled: 1491000
    num_agent_steps_trained: 1491000
    num_steps_sampled: 1491000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1491,65360.7,1491000,-2.6797,-2.19,-3.6,267.97




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1492000
  custom_metrics: {}
  date: 2021-10-25_10-41-48
  done: false
  episode_len_mean: 269.23
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6922999999999866
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 4
  episodes_total: 5521
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14003016841486607
          cur_lr: 5.000000000000001e-05
          entropy: 1.1591557251082527
          entropy_coeff: 0.009999999999999998
          kl: 0.00754076583561477
          policy_loss: 0.0020888499087757533
          total_loss: 0.0022791868282688987
          vf_explained_var: 0.5297331213951111
          vf_loss: 0.010725960766689645
    num_agent_steps_sampled: 1492000
    num_agent_steps_trained: 1492000
    num_steps_sampled: 1492000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1492,65422.9,1492000,-2.6923,-2.19,-3.6,269.23


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1493000
  custom_metrics: {}
  date: 2021-10-25_10-42-30
  done: false
  episode_len_mean: 269.33
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6932999999999856
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 4
  episodes_total: 5525
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14003016841486607
          cur_lr: 5.000000000000001e-05
          entropy: 0.8865821441014607
          entropy_coeff: 0.009999999999999998
          kl: 0.007345308499576772
          policy_loss: 0.022729052644636896
          total_loss: 0.025618019203344982
          vf_explained_var: 0.5075342655181885
          vf_loss: 0.010726224144713746
    num_agent_steps_sampled: 1493000
    num_agent_steps_trained: 1493000
    num_steps_sampled: 1493000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1493,65465.2,1493000,-2.6933,-2.19,-3.6,269.33


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1494000
  custom_metrics: {}
  date: 2021-10-25_10-43-17
  done: false
  episode_len_mean: 268.93
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6892999999999865
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 4
  episodes_total: 5529
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14003016841486607
          cur_lr: 5.000000000000001e-05
          entropy: 0.7735588398244646
          entropy_coeff: 0.009999999999999998
          kl: 0.008916758880439583
          policy_loss: 0.016487570106983186
          total_loss: 0.021323818216721215
          vf_explained_var: 0.33674368262290955
          vf_loss: 0.01132322329406937
    num_agent_steps_sampled: 1494000
    num_agent_steps_trained: 1494000
    num_steps_sampled: 1494000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1494,65511.7,1494000,-2.6893,-2.19,-3.6,268.93


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1495000
  custom_metrics: {}
  date: 2021-10-25_10-44-05
  done: false
  episode_len_mean: 268.86
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6885999999999872
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 4
  episodes_total: 5533
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14003016841486607
          cur_lr: 5.000000000000001e-05
          entropy: 0.7137951731681824
          entropy_coeff: 0.009999999999999998
          kl: 0.004762426880734583
          policy_loss: 0.018041822810967764
          total_loss: 0.023349833033151097
          vf_explained_var: 0.2848668098449707
          vf_loss: 0.011779078199631638
    num_agent_steps_sampled: 1495000
    num_agent_steps_trained: 1495000
    num_steps_sampled: 1495000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1495,65559.6,1495000,-2.6886,-2.19,-3.6,268.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1496000
  custom_metrics: {}
  date: 2021-10-25_10-44-53
  done: false
  episode_len_mean: 267.59
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6758999999999866
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 4
  episodes_total: 5537
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07001508420743303
          cur_lr: 5.000000000000001e-05
          entropy: 0.6707517882188161
          entropy_coeff: 0.009999999999999998
          kl: 0.0049703731529608395
          policy_loss: 0.003938197013404634
          total_loss: 0.0099100551671452
          vf_explained_var: 0.1827673763036728
          vf_loss: 0.01233137504508098
    num_agent_steps_sampled: 1496000
    num_agent_steps_trained: 1496000
    num_steps_sampled: 1496000
    num_steps_trained: 1496

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1496,65607.7,1496000,-2.6759,-2.19,-3.6,267.59


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1497000
  custom_metrics: {}
  date: 2021-10-25_10-45-39
  done: false
  episode_len_mean: 266.03
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.660299999999986
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 4
  episodes_total: 5541
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03500754210371652
          cur_lr: 5.000000000000001e-05
          entropy: 0.6583946565786998
          entropy_coeff: 0.009999999999999998
          kl: 0.006358347040238499
          policy_loss: -0.039423706382513045
          total_loss: -0.033235971629619596
          vf_explained_var: 0.17312780022621155
          vf_loss: 0.012549091171887185
    num_agent_steps_sampled: 1497000
    num_agent_steps_trained: 1497000
    num_steps_sampled: 1497000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1497,65653.6,1497000,-2.6603,-2.19,-3.6,266.03


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1498000
  custom_metrics: {}
  date: 2021-10-25_10-46-23
  done: false
  episode_len_mean: 265.11
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.651099999999987
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 4
  episodes_total: 5545
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03500754210371652
          cur_lr: 5.000000000000001e-05
          entropy: 0.7652449038293626
          entropy_coeff: 0.009999999999999998
          kl: 0.005686065719361662
          policy_loss: -0.03314130331079165
          total_loss: -0.028004481063948736
          vf_explained_var: 0.20838120579719543
          vf_loss: 0.012590218325042063
    num_agent_steps_sampled: 1498000
    num_agent_steps_trained: 1498000
    num_steps_sampled: 1498000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1498,65697.9,1498000,-2.6511,-2.19,-3.6,265.11


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1499000
  custom_metrics: {}
  date: 2021-10-25_10-47-08
  done: false
  episode_len_mean: 264.53
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6452999999999873
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 4
  episodes_total: 5549
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03500754210371652
          cur_lr: 5.000000000000001e-05
          entropy: 0.7308740152253045
          entropy_coeff: 0.009999999999999998
          kl: 0.005640433799265744
          policy_loss: -0.011927412781450484
          total_loss: -0.006361446777979533
          vf_explained_var: 0.20992594957351685
          vf_loss: 0.012677247491147783
    num_agent_steps_sampled: 1499000
    num_agent_steps_trained: 1499000
    num_steps_sampled: 1499000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1499,65742.8,1499000,-2.6453,-2.19,-3.6,264.53




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1500000
  custom_metrics: {}
  date: 2021-10-25_10-48-12
  done: false
  episode_len_mean: 263.22
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.6321999999999877
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 4
  episodes_total: 5553
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03500754210371652
          cur_lr: 5.000000000000001e-05
          entropy: 0.5990001055929396
          entropy_coeff: 0.009999999999999998
          kl: 0.005538933839124639
          policy_loss: -0.0794345492290126
          total_loss: -0.07120351584421264
          vf_explained_var: 0.18551114201545715
          vf_loss: 0.014027126971632243
    num_agent_steps_sampled: 1500000
    num_agent_steps_trained: 1500000
    num_steps_sampled: 1500000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1500,65807,1500000,-2.6322,-2.17,-3.6,263.22


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1501000
  custom_metrics: {}
  date: 2021-10-25_10-48-59
  done: false
  episode_len_mean: 261.83
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.6182999999999885
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 4
  episodes_total: 5557
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03500754210371652
          cur_lr: 5.000000000000001e-05
          entropy: 0.6189257171418932
          entropy_coeff: 0.009999999999999998
          kl: 0.005202829033308469
          policy_loss: -0.12355483654472563
          total_loss: -0.11391475391056803
          vf_explained_var: 0.20962654054164886
          vf_loss: 0.015647202057556972
    num_agent_steps_sampled: 1501000
    num_agent_steps_trained: 1501000
    num_steps_sampled: 1501000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1501,65853.8,1501000,-2.6183,-2.17,-3.6,261.83


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1502000
  custom_metrics: {}
  date: 2021-10-25_10-49-45
  done: false
  episode_len_mean: 260.96
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.6095999999999884
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 5
  episodes_total: 5562
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03500754210371652
          cur_lr: 5.000000000000001e-05
          entropy: 0.5991899742020501
          entropy_coeff: 0.009999999999999998
          kl: 0.006640280153237763
          policy_loss: -0.007103619807296329
          total_loss: -0.00039170111219088234
          vf_explained_var: 0.25031739473342896
          vf_loss: 0.012471356894820929
    num_agent_steps_sampled: 1502000
    num_agent_steps_trained: 1502000
    num_steps_sampled: 1502000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1502,65899.6,1502000,-2.6096,-2.17,-3.6,260.96


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1503000
  custom_metrics: {}
  date: 2021-10-25_10-50-32
  done: false
  episode_len_mean: 259.98
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.599799999999988
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 4
  episodes_total: 5566
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03500754210371652
          cur_lr: 5.000000000000001e-05
          entropy: 0.5942255834738414
          entropy_coeff: 0.009999999999999998
          kl: 0.005996157823922458
          policy_loss: 0.03537612847155995
          total_loss: 0.04095581769943237
          vf_explained_var: 0.1896733194589615
          vf_loss: 0.011312032459924619
    num_agent_steps_sampled: 1503000
    num_agent_steps_trained: 1503000
    num_steps_sampled: 1503000
    num_steps_trained: 15030

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1503,65946.4,1503000,-2.5998,-2.17,-3.6,259.98


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1504000
  custom_metrics: {}
  date: 2021-10-25_10-51-18
  done: false
  episode_len_mean: 259.16
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.591599999999989
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 4
  episodes_total: 5570
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03500754210371652
          cur_lr: 5.000000000000001e-05
          entropy: 0.7690998527738783
          entropy_coeff: 0.009999999999999998
          kl: 0.017105915033170997
          policy_loss: 0.020087663001484343
          total_loss: 0.026725998438066906
          vf_explained_var: 0.1299857199192047
          vf_loss: 0.013730497378855944
    num_agent_steps_sampled: 1504000
    num_agent_steps_trained: 1504000
    num_steps_sampled: 1504000
    num_steps_trained: 150

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1504,65992.4,1504000,-2.5916,-2.17,-3.6,259.16


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1505000
  custom_metrics: {}
  date: 2021-10-25_10-52-01
  done: false
  episode_len_mean: 259.36
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.593599999999989
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 3
  episodes_total: 5573
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03500754210371652
          cur_lr: 5.000000000000001e-05
          entropy: 0.9335070868333181
          entropy_coeff: 0.009999999999999998
          kl: 0.022519045742321827
          policy_loss: -0.1133220101810164
          total_loss: -0.10848245931168397
          vf_explained_var: 0.2181171476840973
          vf_loss: 0.013386287509153287
    num_agent_steps_sampled: 1505000
    num_agent_steps_trained: 1505000
    num_steps_sampled: 1505000
    num_steps_trained: 1505

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1505,66035.3,1505000,-2.5936,-2.17,-3.6,259.36


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1506000
  custom_metrics: {}
  date: 2021-10-25_10-52-44
  done: false
  episode_len_mean: 259.87
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.5986999999999885
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 4
  episodes_total: 5577
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.052511313155574786
          cur_lr: 5.000000000000001e-05
          entropy: 1.018612507979075
          entropy_coeff: 0.009999999999999998
          kl: 0.01564425536053638
          policy_loss: 0.007189212056497733
          total_loss: 0.008790312210718791
          vf_explained_var: 0.22268083691596985
          vf_loss: 0.01096572678329216
    num_agent_steps_sampled: 1506000
    num_agent_steps_trained: 1506000
    num_steps_sampled: 1506000
    num_steps_trained: 150

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1506,66078.6,1506000,-2.5987,-2.17,-3.6,259.87




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1507000
  custom_metrics: {}
  date: 2021-10-25_10-53-46
  done: false
  episode_len_mean: 259.71
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.5970999999999886
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 4
  episodes_total: 5581
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.052511313155574786
          cur_lr: 5.000000000000001e-05
          entropy: 1.0830354862742955
          entropy_coeff: 0.009999999999999998
          kl: 0.007974656565016187
          policy_loss: -0.01967665904925929
          total_loss: -0.016520685040288502
          vf_explained_var: 0.20361408591270447
          vf_loss: 0.013567569707002904
    num_agent_steps_sampled: 1507000
    num_agent_steps_trained: 1507000
    num_steps_sampled: 1507000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1507,66140.6,1507000,-2.5971,-2.17,-3.6,259.71


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1508000
  custom_metrics: {}
  date: 2021-10-25_10-54-24
  done: false
  episode_len_mean: 260.04
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.600399999999988
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 3
  episodes_total: 5584
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.052511313155574786
          cur_lr: 5.000000000000001e-05
          entropy: 1.20973499417305
          entropy_coeff: 0.009999999999999998
          kl: 0.022626695137539184
          policy_loss: -0.07226293111840884
          total_loss: -0.07023083517948786
          vf_explained_var: 0.2649230360984802
          vf_loss: 0.012941287364810705
    num_agent_steps_sampled: 1508000
    num_agent_steps_trained: 1508000
    num_steps_sampled: 1508000
    num_steps_trained: 1508

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1508,66178.5,1508000,-2.6004,-2.17,-3.6,260.04


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1509000
  custom_metrics: {}
  date: 2021-10-25_10-55-09
  done: false
  episode_len_mean: 260.88
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.608799999999988
  episode_reward_min: -3.9499999999999598
  episodes_this_iter: 4
  episodes_total: 5588
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07876696973336217
          cur_lr: 5.000000000000001e-05
          entropy: 0.9536790053049723
          entropy_coeff: 0.009999999999999998
          kl: 0.01071008377013156
          policy_loss: -0.017627281985349127
          total_loss: -0.01383106592628691
          vf_explained_var: 0.26430824398994446
          vf_loss: 0.012489405882337854
    num_agent_steps_sampled: 1509000
    num_agent_steps_trained: 1509000
    num_steps_sampled: 1509000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1509,66223.4,1509000,-2.6088,-2.17,-3.95,260.88


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1510000
  custom_metrics: {}
  date: 2021-10-25_10-55-55
  done: false
  episode_len_mean: 261.16
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.611599999999988
  episode_reward_min: -3.9499999999999598
  episodes_this_iter: 4
  episodes_total: 5592
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07876696973336217
          cur_lr: 5.000000000000001e-05
          entropy: 0.8296368963188595
          entropy_coeff: 0.009999999999999998
          kl: 0.01034910777422182
          policy_loss: -0.041825162867705025
          total_loss: -0.035822451776928374
          vf_explained_var: 0.15308548510074615
          vf_loss: 0.013483911307735576
    num_agent_steps_sampled: 1510000
    num_agent_steps_trained: 1510000
    num_steps_sampled: 1510000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1510,66269.3,1510000,-2.6116,-2.17,-3.95,261.16


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1511000
  custom_metrics: {}
  date: 2021-10-25_10-56-41
  done: false
  episode_len_mean: 260.78
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.6077999999999877
  episode_reward_min: -3.9499999999999598
  episodes_this_iter: 4
  episodes_total: 5596
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07876696973336217
          cur_lr: 5.000000000000001e-05
          entropy: 0.9874789555867513
          entropy_coeff: 0.009999999999999998
          kl: 0.02133263404854616
          policy_loss: -0.038785057763258614
          total_loss: -0.034088328646288975
          vf_explained_var: 0.1547897458076477
          vf_loss: 0.012891210139625603
    num_agent_steps_sampled: 1511000
    num_agent_steps_trained: 1511000
    num_steps_sampled: 1511000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1511,66315,1511000,-2.6078,-2.17,-3.95,260.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1512000
  custom_metrics: {}
  date: 2021-10-25_10-57-22
  done: false
  episode_len_mean: 260.85
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.6084999999999883
  episode_reward_min: -3.9499999999999598
  episodes_this_iter: 4
  episodes_total: 5600
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11815045460004328
          cur_lr: 5.000000000000001e-05
          entropy: 1.2022900965478684
          entropy_coeff: 0.009999999999999998
          kl: 0.009471289763921063
          policy_loss: 0.011508535676532322
          total_loss: 0.013067151937219832
          vf_explained_var: 0.16459950804710388
          vf_loss: 0.012462478669153319
    num_agent_steps_sampled: 1512000
    num_agent_steps_trained: 1512000
    num_steps_sampled: 1512000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1512,66356.7,1512000,-2.6085,-2.17,-3.95,260.85


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1513000
  custom_metrics: {}
  date: 2021-10-25_10-58-04
  done: false
  episode_len_mean: 260.36
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.603599999999988
  episode_reward_min: -3.9499999999999598
  episodes_this_iter: 4
  episodes_total: 5604
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11815045460004328
          cur_lr: 5.000000000000001e-05
          entropy: 1.058427662981881
          entropy_coeff: 0.009999999999999998
          kl: 0.0105102733358885
          policy_loss: 0.03240355178713798
          total_loss: 0.033747646792067425
          vf_explained_var: 0.11288266628980637
          vf_loss: 0.010686579915798373
    num_agent_steps_sampled: 1513000
    num_agent_steps_trained: 1513000
    num_steps_sampled: 1513000
    num_steps_trained: 151300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1513,66397.9,1513000,-2.6036,-2.17,-3.95,260.36


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1514000
  custom_metrics: {}
  date: 2021-10-25_10-58-45
  done: false
  episode_len_mean: 260.04
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.600399999999989
  episode_reward_min: -3.9499999999999598
  episodes_this_iter: 3
  episodes_total: 5607
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11815045460004328
          cur_lr: 5.000000000000001e-05
          entropy: 1.121412154701021
          entropy_coeff: 0.009999999999999998
          kl: 0.04716580841973699
          policy_loss: -0.007524036781655418
          total_loss: -0.00352836185031467
          vf_explained_var: 0.004105777014046907
          vf_loss: 0.009637131650621692
    num_agent_steps_sampled: 1514000
    num_agent_steps_trained: 1514000
    num_steps_sampled: 1514000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1514,66439.5,1514000,-2.6004,-2.17,-3.95,260.04




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1515000
  custom_metrics: {}
  date: 2021-10-25_10-59-31
  done: false
  episode_len_mean: 262.42
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.624199999999988
  episode_reward_min: -4.209999999999955
  episodes_this_iter: 3
  episodes_total: 5610
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17722568190006485
          cur_lr: 5.000000000000001e-05
          entropy: 1.632050421502855
          entropy_coeff: 0.009999999999999998
          kl: 0.0165614461980257
          policy_loss: 0.023976410180330275
          total_loss: 0.016418690068854228
          vf_explained_var: -0.06009507179260254
          vf_loss: 0.00582766874641594
    num_agent_steps_sampled: 1515000
    num_agent_steps_trained: 1515000
    num_steps_sampled: 1515000
    num_steps_trained: 151500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1515,66485.2,1515000,-2.6242,-2.17,-4.21,262.42


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1516000
  custom_metrics: {}
  date: 2021-10-25_11-00-10
  done: false
  episode_len_mean: 263.13
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.6312999999999875
  episode_reward_min: -4.209999999999955
  episodes_this_iter: 3
  episodes_total: 5613
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17722568190006485
          cur_lr: 5.000000000000001e-05
          entropy: 1.2807190001010895
          entropy_coeff: 0.009999999999999998
          kl: 0.018326665194375705
          policy_loss: 0.020480722354518043
          total_loss: 0.01881379783153534
          vf_explained_var: 0.04902391880750656
          vf_loss: 0.007892311467892594
    num_agent_steps_sampled: 1516000
    num_agent_steps_trained: 1516000
    num_steps_sampled: 1516000
    num_steps_trained: 151

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1516,66524.2,1516000,-2.6313,-2.17,-4.21,263.13


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1517000
  custom_metrics: {}
  date: 2021-10-25_11-00-41
  done: false
  episode_len_mean: 263.93
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.6392999999999875
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 2
  episodes_total: 5615
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17722568190006485
          cur_lr: 5.000000000000001e-05
          entropy: 1.5460407780276404
          entropy_coeff: 0.009999999999999998
          kl: 0.016993422930454787
          policy_loss: -0.1045626531044642
          total_loss: -0.10690201669931412
          vf_explained_var: -0.11413463950157166
          vf_loss: 0.010109374273775352
    num_agent_steps_sampled: 1517000
    num_agent_steps_trained: 1517000
    num_steps_sampled: 1517000
    num_steps_trained: 151

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1517,66555.5,1517000,-2.6393,-2.17,-4.44,263.93


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1518000
  custom_metrics: {}
  date: 2021-10-25_11-01-11
  done: false
  episode_len_mean: 267.82
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.678199999999986
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 3
  episodes_total: 5618
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17722568190006485
          cur_lr: 5.000000000000001e-05
          entropy: 1.89888739850786
          entropy_coeff: 0.009999999999999998
          kl: 0.013192816228024384
          policy_loss: 0.01570204057627254
          total_loss: 0.005618029750055737
          vf_explained_var: 0.1713111400604248
          vf_loss: 0.006566760095302016
    num_agent_steps_sampled: 1518000
    num_agent_steps_trained: 1518000
    num_steps_sampled: 1518000
    num_steps_trained: 1518000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1518,66585.3,1518000,-2.6782,-2.17,-4.44,267.82


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1519000
  custom_metrics: {}
  date: 2021-10-25_11-01-47
  done: false
  episode_len_mean: 270.11
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.701099999999986
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 3
  episodes_total: 5621
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17722568190006485
          cur_lr: 5.000000000000001e-05
          entropy: 1.6552833689583673
          entropy_coeff: 0.009999999999999998
          kl: 0.020612007987611796
          policy_loss: 0.03637923863199022
          total_loss: 0.03378359509838952
          vf_explained_var: 0.25316449999809265
          vf_loss: 0.010304212319897487
    num_agent_steps_sampled: 1519000
    num_agent_steps_trained: 1519000
    num_steps_sampled: 1519000
    num_steps_trained: 151900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1519,66621.2,1519000,-2.7011,-2.17,-4.44,270.11


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1520000
  custom_metrics: {}
  date: 2021-10-25_11-02-24
  done: false
  episode_len_mean: 271.72
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.717199999999986
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 3
  episodes_total: 5624
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2658385228500974
          cur_lr: 5.000000000000001e-05
          entropy: 1.567201773987876
          entropy_coeff: 0.009999999999999998
          kl: 0.011952825165645543
          policy_loss: -0.03889217641618517
          total_loss: -0.0401261427336269
          vf_explained_var: 0.15401655435562134
          vf_loss: 0.011260526606606112
    num_agent_steps_sampled: 1520000
    num_agent_steps_trained: 1520000
    num_steps_sampled: 1520000
    num_steps_trained: 1520000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1520,66657.9,1520000,-2.7172,-2.17,-4.44,271.72


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1521000
  custom_metrics: {}
  date: 2021-10-25_11-03-00
  done: false
  episode_len_mean: 274.57
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.745699999999985
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 3
  episodes_total: 5627
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2658385228500974
          cur_lr: 5.000000000000001e-05
          entropy: 2.034685712390476
          entropy_coeff: 0.009999999999999998
          kl: 0.012515143732117116
          policy_loss: 0.033619227011998494
          total_loss: 0.026441730393303765
          vf_explained_var: 0.4565577208995819
          vf_loss: 0.009842349270669123
    num_agent_steps_sampled: 1521000
    num_agent_steps_trained: 1521000
    num_steps_sampled: 1521000
    num_steps_trained: 1521000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1521,66694.1,1521000,-2.7457,-2.17,-4.44,274.57


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1522000
  custom_metrics: {}
  date: 2021-10-25_11-03-40
  done: false
  episode_len_mean: 275.81
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.758099999999985
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 4
  episodes_total: 5631
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2658385228500974
          cur_lr: 5.000000000000001e-05
          entropy: 1.2327418718073102
          entropy_coeff: 0.009999999999999998
          kl: 0.008574108222686366
          policy_loss: -0.064330076922973
          total_loss: -0.05922499837146865
          vf_explained_var: 0.298241525888443
          vf_loss: 0.015153168105623789
    num_agent_steps_sampled: 1522000
    num_agent_steps_trained: 1522000
    num_steps_sampled: 1522000
    num_steps_trained: 1522000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1522,66733.9,1522000,-2.7581,-2.17,-4.44,275.81


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1523000
  custom_metrics: {}
  date: 2021-10-25_11-04-17
  done: false
  episode_len_mean: 277.67
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.776699999999985
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 3
  episodes_total: 5634
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2658385228500974
          cur_lr: 5.000000000000001e-05
          entropy: 1.367139168580373
          entropy_coeff: 0.009999999999999998
          kl: 0.022491987839236394
          policy_loss: 0.0038111289342244465
          total_loss: 0.006813935438791911
          vf_explained_var: 0.5549349784851074
          vf_loss: 0.010694960409051015
    num_agent_steps_sampled: 1523000
    num_agent_steps_trained: 1523000
    num_steps_sampled: 1523000
    num_steps_trained: 152300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1523,66771.3,1523000,-2.7767,-2.17,-4.44,277.67


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1524000
  custom_metrics: {}
  date: 2021-10-25_11-04-55
  done: false
  episode_len_mean: 279.64
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.796399999999984
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 3
  episodes_total: 5637
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3987577842751461
          cur_lr: 5.000000000000001e-05
          entropy: 1.5625634418593513
          entropy_coeff: 0.009999999999999998
          kl: 0.017136738053090638
          policy_loss: -0.09274589013722208
          total_loss: -0.09549698324667083
          vf_explained_var: 0.7433381676673889
          vf_loss: 0.006041134797528179
    num_agent_steps_sampled: 1524000
    num_agent_steps_trained: 1524000
    num_steps_sampled: 1524000
    num_steps_trained: 152400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1524,66808.8,1524000,-2.7964,-2.17,-4.44,279.64




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1525000
  custom_metrics: {}
  date: 2021-10-25_11-05-59
  done: false
  episode_len_mean: 280.26
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.802599999999984
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 4
  episodes_total: 5641
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3987577842751461
          cur_lr: 5.000000000000001e-05
          entropy: 0.9270995676517486
          entropy_coeff: 0.009999999999999998
          kl: 0.006038750654323907
          policy_loss: 0.026583579265409045
          total_loss: 0.0303649150662952
          vf_explained_var: 0.6035964488983154
          vf_loss: 0.010644333005054957
    num_agent_steps_sampled: 1525000
    num_agent_steps_trained: 1525000
    num_steps_sampled: 1525000
    num_steps_trained: 1525000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1525,66873.2,1525000,-2.8026,-2.17,-4.44,280.26


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1526000
  custom_metrics: {}
  date: 2021-10-25_11-06-38
  done: false
  episode_len_mean: 281.73
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.8172999999999835
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 4
  episodes_total: 5645
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3987577842751461
          cur_lr: 5.000000000000001e-05
          entropy: 1.2879383689827388
          entropy_coeff: 0.009999999999999998
          kl: 0.009486903525010717
          policy_loss: -0.09862856583462822
          total_loss: -0.10077074236339993
          vf_explained_var: 0.8222385048866272
          vf_loss: 0.006954230339680281
    num_agent_steps_sampled: 1526000
    num_agent_steps_trained: 1526000
    num_steps_sampled: 1526000
    num_steps_trained: 15260

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1526,66912.1,1526000,-2.8173,-2.17,-4.44,281.73


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1527000
  custom_metrics: {}
  date: 2021-10-25_11-07-19
  done: false
  episode_len_mean: 282.6
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.8259999999999836
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 3
  episodes_total: 5648
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3987577842751461
          cur_lr: 5.000000000000001e-05
          entropy: 1.0723765393098195
          entropy_coeff: 0.009999999999999998
          kl: 0.00895946126807896
          policy_loss: 0.03510460990170638
          total_loss: 0.03102252831061681
          vf_explained_var: 0.9093896746635437
          vf_loss: 0.003069028756322546
    num_agent_steps_sampled: 1527000
    num_agent_steps_trained: 1527000
    num_steps_sampled: 1527000
    num_steps_trained: 1527000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1527,66952.9,1527000,-2.826,-2.17,-4.44,282.6


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1528000
  custom_metrics: {}
  date: 2021-10-25_11-08-00
  done: false
  episode_len_mean: 284.6
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.8459999999999837
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 4
  episodes_total: 5652
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3987577842751461
          cur_lr: 5.000000000000001e-05
          entropy: 1.2120192527770997
          entropy_coeff: 0.009999999999999998
          kl: 0.007223652353462108
          policy_loss: -0.0657722090681394
          total_loss: -0.06962606319122844
          vf_explained_var: 0.8800345659255981
          vf_loss: 0.005385852486102118
    num_agent_steps_sampled: 1528000
    num_agent_steps_trained: 1528000
    num_steps_sampled: 1528000
    num_steps_trained: 1528000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1528,66993.8,1528000,-2.846,-2.31,-4.44,284.6


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1529000
  custom_metrics: {}
  date: 2021-10-25_11-08-41
  done: false
  episode_len_mean: 285.51
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.855099999999983
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 3
  episodes_total: 5655
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3987577842751461
          cur_lr: 5.000000000000001e-05
          entropy: 1.1708355764547984
          entropy_coeff: 0.009999999999999998
          kl: 0.04033022151846855
          policy_loss: -0.011288093527158101
          total_loss: -0.003089362879594167
          vf_explained_var: 0.903192400932312
          vf_loss: 0.0038250961405639017
    num_agent_steps_sampled: 1529000
    num_agent_steps_trained: 1529000
    num_steps_sampled: 1529000
    num_steps_trained: 15290

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1529,67034.8,1529000,-2.8551,-2.31,-4.44,285.51


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1530000
  custom_metrics: {}
  date: 2021-10-25_11-09-23
  done: false
  episode_len_mean: 286.9
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.868999999999982
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 4
  episodes_total: 5659
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5981366764127191
          cur_lr: 5.000000000000001e-05
          entropy: 0.9620404084523518
          entropy_coeff: 0.009999999999999998
          kl: 0.007143514755651164
          policy_loss: -0.10199463338487678
          total_loss: -0.10259341206401587
          vf_explained_var: 0.8983262181282043
          vf_loss: 0.004748832439589831
    num_agent_steps_sampled: 1530000
    num_agent_steps_trained: 1530000
    num_steps_sampled: 1530000
    num_steps_trained: 1530000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1530,67076.9,1530000,-2.869,-2.31,-4.44,286.9


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1531000
  custom_metrics: {}
  date: 2021-10-25_11-10-08
  done: false
  episode_len_mean: 287.72
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.877199999999982
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 4
  episodes_total: 5663
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5981366764127191
          cur_lr: 5.000000000000001e-05
          entropy: 0.6898233460055457
          entropy_coeff: 0.009999999999999998
          kl: 0.009107030930345733
          policy_loss: -0.07159712380833096
          total_loss: -0.06890371185210016
          vf_explained_var: 0.9029685854911804
          vf_loss: 0.004144393932074308
    num_agent_steps_sampled: 1531000
    num_agent_steps_trained: 1531000
    num_steps_sampled: 1531000
    num_steps_trained: 153100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1531,67121.4,1531000,-2.8772,-2.31,-4.44,287.72


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1532000
  custom_metrics: {}
  date: 2021-10-25_11-10-52
  done: false
  episode_len_mean: 288.26
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.8825999999999823
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 3
  episodes_total: 5666
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5981366764127191
          cur_lr: 5.000000000000001e-05
          entropy: 0.49550599654515587
          entropy_coeff: 0.009999999999999998
          kl: 0.007448785235578883
          policy_loss: -0.09222494380341636
          total_loss: -0.08877636128001742
          vf_explained_var: 0.884810209274292
          vf_loss: 0.003948250402592951
    num_agent_steps_sampled: 1532000
    num_agent_steps_trained: 1532000
    num_steps_sampled: 1532000
    num_steps_trained: 15320

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1532,67165.6,1532000,-2.8826,-2.31,-4.44,288.26




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1533000
  custom_metrics: {}
  date: 2021-10-25_11-11-55
  done: false
  episode_len_mean: 288.15
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.881499999999982
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 4
  episodes_total: 5670
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5981366764127191
          cur_lr: 5.000000000000001e-05
          entropy: 0.48499097161822846
          entropy_coeff: 0.009999999999999998
          kl: 0.005220318105498448
          policy_loss: -0.11451472590367
          total_loss: -0.1104344548450576
          vf_explained_var: 0.8411343097686768
          vf_loss: 0.005807712819013331
    num_agent_steps_sampled: 1533000
    num_agent_steps_trained: 1533000
    num_steps_sampled: 1533000
    num_steps_trained: 1533000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1533,67229.1,1533000,-2.8815,-2.31,-4.44,288.15


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1534000
  custom_metrics: {}
  date: 2021-10-25_11-12-40
  done: false
  episode_len_mean: 288.03
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.8802999999999823
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 4
  episodes_total: 5674
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5981366764127191
          cur_lr: 5.000000000000001e-05
          entropy: 0.4900636206070582
          entropy_coeff: 0.009999999999999998
          kl: 0.005535366393275372
          policy_loss: -0.01843867227435112
          total_loss: -0.014770605994595423
          vf_explained_var: 0.7613080143928528
          vf_loss: 0.005257794333414899
    num_agent_steps_sampled: 1534000
    num_agent_steps_trained: 1534000
    num_steps_sampled: 1534000
    num_steps_trained: 1534

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1534,67273.8,1534000,-2.8803,-2.31,-4.44,288.03


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1535000
  custom_metrics: {}
  date: 2021-10-25_11-13-22
  done: false
  episode_len_mean: 287.78
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.877799999999983
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 4
  episodes_total: 5678
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5981366764127191
          cur_lr: 5.000000000000001e-05
          entropy: 0.5293956094317966
          entropy_coeff: 0.009999999999999998
          kl: 0.004586924093671662
          policy_loss: 0.0082839734852314
          total_loss: 0.012456116990910636
          vf_explained_var: 0.6997315883636475
          vf_loss: 0.00672249115175671
    num_agent_steps_sampled: 1535000
    num_agent_steps_trained: 1535000
    num_steps_sampled: 1535000
    num_steps_trained: 1535000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1535,67315.1,1535000,-2.8778,-2.31,-4.44,287.78


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1536000
  custom_metrics: {}
  date: 2021-10-25_11-14-07
  done: false
  episode_len_mean: 287.83
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -2.8782999999999817
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 4
  episodes_total: 5682
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29906833820635953
          cur_lr: 5.000000000000001e-05
          entropy: 0.4964518156316545
          entropy_coeff: 0.009999999999999998
          kl: 0.06293397260349336
          policy_loss: 0.03671191510640913
          total_loss: 0.05468873938338624
          vf_explained_var: 0.8492777347564697
          vf_loss: 0.004119783196236111
    num_agent_steps_sampled: 1536000
    num_agent_steps_trained: 1536000
    num_steps_sampled: 1536000
    num_steps_trained: 1536000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1536,67360.8,1536000,-2.8783,-2.32,-4.44,287.83


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1537000
  custom_metrics: {}
  date: 2021-10-25_11-14-44
  done: false
  episode_len_mean: 288.59
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -2.885899999999982
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 3
  episodes_total: 5685
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4486025073095393
          cur_lr: 5.000000000000001e-05
          entropy: 0.6598622666464912
          entropy_coeff: 0.009999999999999998
          kl: 0.034157120959743756
          policy_loss: -0.022276442911889818
          total_loss: -0.004647866677906778
          vf_explained_var: 0.7006311416625977
          vf_loss: 0.008904229280435376
    num_agent_steps_sampled: 1537000
    num_agent_steps_trained: 1537000
    num_steps_sampled: 1537000
    num_steps_trained: 1537

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1537,67397.7,1537000,-2.8859,-2.32,-4.44,288.59


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1538000
  custom_metrics: {}
  date: 2021-10-25_11-15-17
  done: false
  episode_len_mean: 291.73
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -2.917299999999982
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 3
  episodes_total: 5688
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.672903760964309
          cur_lr: 5.000000000000001e-05
          entropy: 0.7469623916678958
          entropy_coeff: 0.009999999999999998
          kl: 0.023446799144152704
          policy_loss: -0.00037377004822095234
          total_loss: 0.014448832637733883
          vf_explained_var: 0.5913312435150146
          vf_loss: 0.006514787534251809
    num_agent_steps_sampled: 1538000
    num_agent_steps_trained: 1538000
    num_steps_sampled: 1538000
    num_steps_trained: 1538

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1538,67430.6,1538000,-2.9173,-2.32,-4.44,291.73


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1539000
  custom_metrics: {}
  date: 2021-10-25_11-15-52
  done: false
  episode_len_mean: 294.69
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -2.946899999999981
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 3
  episodes_total: 5691
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0093556414464633
          cur_lr: 5.000000000000001e-05
          entropy: 0.7866166121429867
          entropy_coeff: 0.009999999999999998
          kl: 0.0024277360995098953
          policy_loss: 0.008738899810446633
          total_loss: 0.011830738104052014
          vf_explained_var: 0.15878796577453613
          vf_loss: 0.00850755589393278
    num_agent_steps_sampled: 1539000
    num_agent_steps_trained: 1539000
    num_steps_sampled: 1539000
    num_steps_trained: 15390

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1539,67465.5,1539000,-2.9469,-2.32,-4.44,294.69


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1540000
  custom_metrics: {}
  date: 2021-10-25_11-16-28
  done: false
  episode_len_mean: 296.13
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -2.9612999999999814
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 3
  episodes_total: 5694
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5046778207232316
          cur_lr: 5.000000000000001e-05
          entropy: 0.8018368310398526
          entropy_coeff: 0.009999999999999998
          kl: 0.0036117233669364012
          policy_loss: 0.09190829636322127
          total_loss: 0.09324521140919792
          vf_explained_var: 0.19296160340309143
          vf_loss: 0.007532526349597093
    num_agent_steps_sampled: 1540000
    num_agent_steps_trained: 1540000
    num_steps_sampled: 1540000
    num_steps_trained: 15400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1540,67501.1,1540000,-2.9613,-2.32,-4.44,296.13


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1541000
  custom_metrics: {}
  date: 2021-10-25_11-17-06
  done: false
  episode_len_mean: 297.71
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -2.97709999999998
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 3
  episodes_total: 5697
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2523389103616158
          cur_lr: 5.000000000000001e-05
          entropy: 0.7726964155832926
          entropy_coeff: 0.009999999999999998
          kl: 0.010349558422832514
          policy_loss: 0.03457309148377842
          total_loss: 0.036062288367085985
          vf_explained_var: 0.20421087741851807
          vf_loss: 0.006604564836015925
    num_agent_steps_sampled: 1541000
    num_agent_steps_trained: 1541000
    num_steps_sampled: 1541000
    num_steps_trained: 1541000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1541,67539.6,1541000,-2.9771,-2.32,-4.44,297.71




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1542000
  custom_metrics: {}
  date: 2021-10-25_11-18-05
  done: false
  episode_len_mean: 298.03
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.9802999999999797
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 4
  episodes_total: 5701
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2523389103616158
          cur_lr: 5.000000000000001e-05
          entropy: 0.7681817253430684
          entropy_coeff: 0.009999999999999998
          kl: 0.028481962256846474
          policy_loss: 0.005477795832686954
          total_loss: 0.01692142759760221
          vf_explained_var: 0.18232552707195282
          vf_loss: 0.011938341132675608
    num_agent_steps_sampled: 1542000
    num_agent_steps_trained: 1542000
    num_steps_sampled: 1542000
    num_steps_trained: 15420

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1542,67598.8,1542000,-2.9803,-2.28,-4.44,298.03


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1543000
  custom_metrics: {}
  date: 2021-10-25_11-18-51
  done: false
  episode_len_mean: 297.96
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.9795999999999805
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 3
  episodes_total: 5704
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3785083655424238
          cur_lr: 5.000000000000001e-05
          entropy: 0.803571449385749
          entropy_coeff: 0.009999999999999998
          kl: 0.0049670967308732845
          policy_loss: -0.10082561936643389
          total_loss: -0.09378564059734344
          vf_explained_var: 0.23824556171894073
          vf_loss: 0.013195602802766694
    num_agent_steps_sampled: 1543000
    num_agent_steps_trained: 1543000
    num_steps_sampled: 1543000
    num_steps_trained: 1543

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1543,67644.5,1543000,-2.9796,-2.28,-4.44,297.96


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1544000
  custom_metrics: {}
  date: 2021-10-25_11-19-32
  done: false
  episode_len_mean: 296.32
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.963199999999981
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 4
  episodes_total: 5708
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1892541827712119
          cur_lr: 5.000000000000001e-05
          entropy: 0.8395287911097209
          entropy_coeff: 0.009999999999999998
          kl: 0.007929115267516017
          policy_loss: 0.004078324966960483
          total_loss: 0.008647728794150883
          vf_explained_var: 0.3535710573196411
          vf_loss: 0.011464072753571801
    num_agent_steps_sampled: 1544000
    num_agent_steps_trained: 1544000
    num_steps_sampled: 1544000
    num_steps_trained: 154400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1544,67685.3,1544000,-2.9632,-2.28,-4.44,296.32


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1545000
  custom_metrics: {}
  date: 2021-10-25_11-20-12
  done: false
  episode_len_mean: 294.18
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.9417999999999807
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 4
  episodes_total: 5712
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1892541827712119
          cur_lr: 5.000000000000001e-05
          entropy: 0.9084856543276045
          entropy_coeff: 0.009999999999999998
          kl: 0.011983813389177714
          policy_loss: -0.005030689388513565
          total_loss: 0.0010745803515116374
          vf_explained_var: 0.3407820463180542
          vf_loss: 0.012922139279544354
    num_agent_steps_sampled: 1545000
    num_agent_steps_trained: 1545000
    num_steps_sampled: 1545000
    num_steps_trained: 154

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1545,67725,1545000,-2.9418,-2.28,-4.44,294.18


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1546000
  custom_metrics: {}
  date: 2021-10-25_11-20-52
  done: false
  episode_len_mean: 292.58
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.9257999999999815
  episode_reward_min: -4.369999999999951
  episodes_this_iter: 3
  episodes_total: 5715
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1892541827712119
          cur_lr: 5.000000000000001e-05
          entropy: 0.8521608412265778
          entropy_coeff: 0.009999999999999998
          kl: 0.005424138250468818
          policy_loss: 0.04113865883813964
          total_loss: 0.042014885031514695
          vf_explained_var: 0.35007795691490173
          vf_loss: 0.008371292313353883
    num_agent_steps_sampled: 1546000
    num_agent_steps_trained: 1546000
    num_steps_sampled: 1546000
    num_steps_trained: 1546

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1546,67765,1546000,-2.9258,-2.28,-4.37,292.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1547000
  custom_metrics: {}
  date: 2021-10-25_11-21-23
  done: false
  episode_len_mean: 291.05
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.910499999999982
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5718
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1892541827712119
          cur_lr: 5.000000000000001e-05
          entropy: 0.9818383965227339
          entropy_coeff: 0.009999999999999998
          kl: 0.04955376501065985
          policy_loss: 0.03778833829694324
          total_loss: 0.04771937049097485
          vf_explained_var: -0.10869823396205902
          vf_loss: 0.010371159881146418
    num_agent_steps_sampled: 1547000
    num_agent_steps_trained: 1547000
    num_steps_sampled: 1547000
    num_steps_trained: 154700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1547,67796.5,1547000,-2.9105,-2.28,-3.99,291.05


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1548000
  custom_metrics: {}
  date: 2021-10-25_11-22-02
  done: false
  episode_len_mean: 290.26
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.9025999999999823
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5721
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2838812741568178
          cur_lr: 5.000000000000001e-05
          entropy: 0.9294193691677517
          entropy_coeff: 0.009999999999999998
          kl: 0.011758226050815779
          policy_loss: 0.03831576920217938
          total_loss: 0.041303581330511305
          vf_explained_var: 0.20966273546218872
          vf_loss: 0.00894406652595434
    num_agent_steps_sampled: 1548000
    num_agent_steps_trained: 1548000
    num_steps_sampled: 1548000
    num_steps_trained: 15480

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1548,67834.8,1548000,-2.9026,-2.28,-3.99,290.26


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1549000
  custom_metrics: {}
  date: 2021-10-25_11-22-40
  done: false
  episode_len_mean: 290.08
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.9007999999999816
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5724
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2838812741568178
          cur_lr: 5.000000000000001e-05
          entropy: 0.9085890087816451
          entropy_coeff: 0.009999999999999998
          kl: 0.008934365673159386
          policy_loss: -0.031402077691422565
          total_loss: -0.027092092153098848
          vf_explained_var: 0.1749189794063568
          vf_loss: 0.010859575567560063
    num_agent_steps_sampled: 1549000
    num_agent_steps_trained: 1549000
    num_steps_sampled: 1549000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1549,67873.1,1549000,-2.9008,-2.28,-3.99,290.08


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1550000
  custom_metrics: {}
  date: 2021-10-25_11-23-19
  done: false
  episode_len_mean: 288.1
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.8809999999999825
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5728
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2838812741568178
          cur_lr: 5.000000000000001e-05
          entropy: 0.8522671593560113
          entropy_coeff: 0.009999999999999998
          kl: 0.005778911052351848
          policy_loss: 0.016270529561572603
          total_loss: 0.023142020404338836
          vf_explained_var: 0.1721569150686264
          vf_loss: 0.013753636885020468
    num_agent_steps_sampled: 1550000
    num_agent_steps_trained: 1550000
    num_steps_sampled: 1550000
    num_steps_trained: 15500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1550,67912.4,1550000,-2.881,-2.28,-3.99,288.1




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1551000
  custom_metrics: {}
  date: 2021-10-25_11-24-20
  done: false
  episode_len_mean: 287.83
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.878299999999983
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5732
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2838812741568178
          cur_lr: 5.000000000000001e-05
          entropy: 1.0734041041798061
          entropy_coeff: 0.009999999999999998
          kl: 0.005697110964698342
          policy_loss: 0.017761133197281096
          total_loss: 0.020043597535954583
          vf_explained_var: 0.3462635576725006
          vf_loss: 0.011399199813604355
    num_agent_steps_sampled: 1551000
    num_agent_steps_trained: 1551000
    num_steps_sampled: 1551000
    num_steps_trained: 15510

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1551,67973.4,1551000,-2.8783,-2.28,-3.99,287.83


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1552000
  custom_metrics: {}
  date: 2021-10-25_11-24-59
  done: false
  episode_len_mean: 287.03
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.8702999999999834
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5735
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2838812741568178
          cur_lr: 5.000000000000001e-05
          entropy: 1.125914082262251
          entropy_coeff: 0.009999999999999998
          kl: 0.007862033661616943
          policy_loss: 0.0221580864654647
          total_loss: 0.02138065786825286
          vf_explained_var: 0.2983916699886322
          vf_loss: 0.00824982638651919
    num_agent_steps_sampled: 1552000
    num_agent_steps_trained: 1552000
    num_steps_sampled: 1552000
    num_steps_trained: 1552000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1552,68011.7,1552000,-2.8703,-2.28,-3.99,287.03


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1553000
  custom_metrics: {}
  date: 2021-10-25_11-25-38
  done: false
  episode_len_mean: 286.76
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.867599999999983
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5738
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2838812741568178
          cur_lr: 5.000000000000001e-05
          entropy: 1.1483292275004917
          entropy_coeff: 0.009999999999999998
          kl: 0.006202529472079574
          policy_loss: -0.1101180997159746
          total_loss: -0.1084161748488744
          vf_explained_var: 0.50010085105896
          vf_loss: 0.011424430242429177
    num_agent_steps_sampled: 1553000
    num_agent_steps_trained: 1553000
    num_steps_sampled: 1553000
    num_steps_trained: 1553000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1553,68051.1,1553000,-2.8676,-2.28,-3.99,286.76


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1554000
  custom_metrics: {}
  date: 2021-10-25_11-26-17
  done: false
  episode_len_mean: 288.84
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.8883999999999825
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5741
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2838812741568178
          cur_lr: 5.000000000000001e-05
          entropy: 1.118428681956397
          entropy_coeff: 0.009999999999999998
          kl: 0.0070486455831835805
          policy_loss: -0.12263892226748996
          total_loss: -0.12102388309107887
          vf_explained_var: 0.5903782844543457
          vf_loss: 0.010798348031110233
    num_agent_steps_sampled: 1554000
    num_agent_steps_trained: 1554000
    num_steps_sampled: 1554000
    num_steps_trained: 1554

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1554,68089.9,1554000,-2.8884,-2.28,-3.99,288.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1555000
  custom_metrics: {}
  date: 2021-10-25_11-26-54
  done: false
  episode_len_mean: 288.83
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.8882999999999823
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5745
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2838812741568178
          cur_lr: 5.000000000000001e-05
          entropy: 1.161847178141276
          entropy_coeff: 0.009999999999999998
          kl: 0.014357611937883874
          policy_loss: 0.0401363977127605
          total_loss: 0.03907071443067656
          vf_explained_var: 0.7085281610488892
          vf_loss: 0.006476932213020822
    num_agent_steps_sampled: 1555000
    num_agent_steps_trained: 1555000
    num_steps_sampled: 1555000
    num_steps_trained: 1555000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1555,68126.6,1555000,-2.8883,-2.28,-3.99,288.83


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1556000
  custom_metrics: {}
  date: 2021-10-25_11-27-33
  done: false
  episode_len_mean: 289.31
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.893099999999982
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5748
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2838812741568178
          cur_lr: 5.000000000000001e-05
          entropy: 1.2932297243012323
          entropy_coeff: 0.009999999999999998
          kl: 0.012930579750949139
          policy_loss: -0.10298278501464261
          total_loss: -0.10444629084732797
          vf_explained_var: 0.6919552683830261
          vf_loss: 0.00779804318315453
    num_agent_steps_sampled: 1556000
    num_agent_steps_trained: 1556000
    num_steps_sampled: 1556000
    num_steps_trained: 155600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1556,68166.5,1556000,-2.8931,-2.28,-3.99,289.31


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1557000
  custom_metrics: {}
  date: 2021-10-25_11-28-10
  done: false
  episode_len_mean: 290.2
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.9019999999999815
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5751
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2838812741568178
          cur_lr: 5.000000000000001e-05
          entropy: 1.245644536283281
          entropy_coeff: 0.009999999999999998
          kl: 0.008369724992593634
          policy_loss: -0.07995309548245536
          total_loss: -0.08133003736535709
          vf_explained_var: 0.7366090416908264
          vf_loss: 0.008703498656137123
    num_agent_steps_sampled: 1557000
    num_agent_steps_trained: 1557000
    num_steps_sampled: 1557000
    num_steps_trained: 155700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1557,68203.5,1557000,-2.902,-2.28,-3.99,290.2


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1558000
  custom_metrics: {}
  date: 2021-10-25_11-28-46
  done: false
  episode_len_mean: 291.77
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.917699999999981
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5754
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2838812741568178
          cur_lr: 5.000000000000001e-05
          entropy: 1.2274845083554586
          entropy_coeff: 0.009999999999999998
          kl: 0.012127855607700846
          policy_loss: -0.1704800749818484
          total_loss: -0.1689076892203755
          vf_explained_var: 0.6146321892738342
          vf_loss: 0.01040435862313542
    num_agent_steps_sampled: 1558000
    num_agent_steps_trained: 1558000
    num_steps_sampled: 1558000
    num_steps_trained: 1558000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1558,68239.1,1558000,-2.9177,-2.28,-3.99,291.77


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1559000
  custom_metrics: {}
  date: 2021-10-25_11-29-21
  done: false
  episode_len_mean: 293.35
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.933499999999981
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5757
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2838812741568178
          cur_lr: 5.000000000000001e-05
          entropy: 1.1364624288347032
          entropy_coeff: 0.009999999999999998
          kl: 0.014860582182294578
          policy_loss: -0.12316938671800826
          total_loss: -0.11960755859812101
          vf_explained_var: 0.6316578388214111
          vf_loss: 0.01070781277699603
    num_agent_steps_sampled: 1559000
    num_agent_steps_trained: 1559000
    num_steps_sampled: 1559000
    num_steps_trained: 155900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1559,68274,1559000,-2.9335,-2.28,-3.99,293.35




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1560000
  custom_metrics: {}
  date: 2021-10-25_11-30-09
  done: false
  episode_len_mean: 295.76
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.95759999999998
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5760
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2838812741568178
          cur_lr: 5.000000000000001e-05
          entropy: 1.0965331945154402
          entropy_coeff: 0.009999999999999998
          kl: 0.0452075422278836
          policy_loss: -0.09460595299800237
          total_loss: -0.07957923577891456
          vf_explained_var: 0.3502000868320465
          vf_loss: 0.013158473982993099
    num_agent_steps_sampled: 1560000
    num_agent_steps_trained: 1560000
    num_steps_sampled: 1560000
    num_steps_trained: 1560000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1560,68322,1560000,-2.9576,-2.28,-3.99,295.76


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1561000
  custom_metrics: {}
  date: 2021-10-25_11-30-46
  done: false
  episode_len_mean: 298.06
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.98059999999998
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5763
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4258219112352267
          cur_lr: 5.000000000000001e-05
          entropy: 1.2945254392094083
          entropy_coeff: 0.009999999999999998
          kl: 0.008460308825927774
          policy_loss: -0.003758086595270369
          total_loss: -0.004332570317718718
          vf_explained_var: 0.2539416253566742
          vf_loss: 0.00876818488403741
    num_agent_steps_sampled: 1561000
    num_agent_steps_trained: 1561000
    num_steps_sampled: 1561000
    num_steps_trained: 15610

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1561,68358.9,1561000,-2.9806,-2.28,-3.99,298.06


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1562000
  custom_metrics: {}
  date: 2021-10-25_11-31-22
  done: false
  episode_len_mean: 300.82
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.00819999999998
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5766
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4258219112352267
          cur_lr: 5.000000000000001e-05
          entropy: 1.335606082280477
          entropy_coeff: 0.009999999999999998
          kl: 0.006932251530981482
          policy_loss: 0.015284332798586951
          total_loss: 0.014156511094835069
          vf_explained_var: 0.2792867124080658
          vf_loss: 0.009276332731436318
    num_agent_steps_sampled: 1562000
    num_agent_steps_trained: 1562000
    num_steps_sampled: 1562000
    num_steps_trained: 1562000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1562,68394.4,1562000,-3.0082,-2.28,-3.99,300.82


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1563000
  custom_metrics: {}
  date: 2021-10-25_11-31-58
  done: false
  episode_len_mean: 303.05
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.0304999999999787
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5769
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4258219112352267
          cur_lr: 5.000000000000001e-05
          entropy: 1.3956501166025796
          entropy_coeff: 0.009999999999999998
          kl: 0.006531099490139525
          policy_loss: 0.0359484174185329
          total_loss: 0.03437309248579873
          vf_explained_var: 0.23947902023792267
          vf_loss: 0.009600092755863442
    num_agent_steps_sampled: 1563000
    num_agent_steps_trained: 1563000
    num_steps_sampled: 1563000
    num_steps_trained: 156300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1563,68431,1563000,-3.0305,-2.28,-3.99,303.05


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1564000
  custom_metrics: {}
  date: 2021-10-25_11-32-37
  done: false
  episode_len_mean: 304.74
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.0473999999999783
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5772
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4258219112352267
          cur_lr: 5.000000000000001e-05
          entropy: 1.3467965655856662
          entropy_coeff: 0.009999999999999998
          kl: 0.00531756712252063
          policy_loss: -0.11143028363585472
          total_loss: -0.10906870944632424
          vf_explained_var: 0.11296302825212479
          vf_loss: 0.013565200629333656
    num_agent_steps_sampled: 1564000
    num_agent_steps_trained: 1564000
    num_steps_sampled: 1564000
    num_steps_trained: 1564

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1564,68469.5,1564000,-3.0474,-2.28,-3.99,304.74


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1565000
  custom_metrics: {}
  date: 2021-10-25_11-33-16
  done: false
  episode_len_mean: 306.64
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.0663999999999785
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5776
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4258219112352267
          cur_lr: 5.000000000000001e-05
          entropy: 1.327917460600535
          entropy_coeff: 0.009999999999999998
          kl: 0.00506252922639558
          policy_loss: -0.007355973621209462
          total_loss: -0.00517957458893458
          vf_explained_var: 0.15476499497890472
          vf_loss: 0.013299839881559214
    num_agent_steps_sampled: 1565000
    num_agent_steps_trained: 1565000
    num_steps_sampled: 1565000
    num_steps_trained: 1565

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1565,68508.5,1565000,-3.0664,-2.28,-3.99,306.64


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1566000
  custom_metrics: {}
  date: 2021-10-25_11-33-51
  done: false
  episode_len_mean: 308.74
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.087399999999978
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5779
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4258219112352267
          cur_lr: 5.000000000000001e-05
          entropy: 1.3715626186794705
          entropy_coeff: 0.009999999999999998
          kl: 0.006271229854483806
          policy_loss: 0.03661979229913818
          total_loss: 0.03454874257246653
          vf_explained_var: 0.2682844400405884
          vf_loss: 0.008974150197011315
    num_agent_steps_sampled: 1566000
    num_agent_steps_trained: 1566000
    num_steps_sampled: 1566000
    num_steps_trained: 1566000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1566,68543.4,1566000,-3.0874,-2.28,-3.99,308.74


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1567000
  custom_metrics: {}
  date: 2021-10-25_11-34-26
  done: false
  episode_len_mean: 310.76
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.107599999999978
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5782
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4258219112352267
          cur_lr: 5.000000000000001e-05
          entropy: 1.3851478576660157
          entropy_coeff: 0.009999999999999998
          kl: 0.007414172223182428
          policy_loss: 0.024460822012689377
          total_loss: 0.023192637579308617
          vf_explained_var: 0.3340383470058441
          vf_loss: 0.009426177282714181
    num_agent_steps_sampled: 1567000
    num_agent_steps_trained: 1567000
    num_steps_sampled: 1567000
    num_steps_trained: 15670

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1567,68578.4,1567000,-3.1076,-2.28,-3.99,310.76


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1568000
  custom_metrics: {}
  date: 2021-10-25_11-35-00
  done: false
  episode_len_mean: 311.07
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.1106999999999774
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5785
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4258219112352267
          cur_lr: 5.000000000000001e-05
          entropy: 1.4184727284643386
          entropy_coeff: 0.009999999999999998
          kl: 0.007219107133616041
          policy_loss: 0.010347547299332089
          total_loss: 0.008988970186975268
          vf_explained_var: 0.2283216416835785
          vf_loss: 0.009752091898634616
    num_agent_steps_sampled: 1568000
    num_agent_steps_trained: 1568000
    num_steps_sampled: 1568000
    num_steps_trained: 1568

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1568,68612.8,1568000,-3.1107,-2.28,-3.99,311.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1569000
  custom_metrics: {}
  date: 2021-10-25_11-35-35
  done: false
  episode_len_mean: 310.38
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.103799999999977
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 2
  episodes_total: 5787
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4258219112352267
          cur_lr: 5.000000000000001e-05
          entropy: 1.4310891363355849
          entropy_coeff: 0.009999999999999998
          kl: 0.008525799217969225
          policy_loss: -0.10760281731685002
          total_loss: -0.1082176234987047
          vf_explained_var: 0.4046279489994049
          vf_loss: 0.010065612057547291
    num_agent_steps_sampled: 1569000
    num_agent_steps_trained: 1569000
    num_steps_sampled: 1569000
    num_steps_trained: 156900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1569,68647.6,1569000,-3.1038,-2.28,-3.99,310.38




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1570000
  custom_metrics: {}
  date: 2021-10-25_11-36-28
  done: false
  episode_len_mean: 310.35
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.1034999999999773
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5790
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4258219112352267
          cur_lr: 5.000000000000001e-05
          entropy: 1.3082573652267455
          entropy_coeff: 0.009999999999999998
          kl: 0.008845898542584942
          policy_loss: -0.1167806284295188
          total_loss: -0.11394032521380318
          vf_explained_var: 0.3003772795200348
          vf_loss: 0.01215609772544768
    num_agent_steps_sampled: 1570000
    num_agent_steps_trained: 1570000
    num_steps_sampled: 1570000
    num_steps_trained: 157000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1570,68700.6,1570000,-3.1035,-2.28,-3.99,310.35


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1571000
  custom_metrics: {}
  date: 2021-10-25_11-37-06
  done: false
  episode_len_mean: 310.35
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.103499999999978
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5794
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4258219112352267
          cur_lr: 5.000000000000001e-05
          entropy: 1.296553905804952
          entropy_coeff: 0.009999999999999998
          kl: 0.004152073145921244
          policy_loss: 0.027812570333480835
          total_loss: 0.02753712944686413
          vf_explained_var: 0.35288554430007935
          vf_loss: 0.010922051635053423
    num_agent_steps_sampled: 1571000
    num_agent_steps_trained: 1571000
    num_steps_sampled: 1571000
    num_steps_trained: 157100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1571,68738.2,1571000,-3.1035,-2.28,-3.99,310.35


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1572000
  custom_metrics: {}
  date: 2021-10-25_11-37-42
  done: false
  episode_len_mean: 311.03
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.1102999999999774
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5797
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21291095561761336
          cur_lr: 5.000000000000001e-05
          entropy: 1.4038288513819377
          entropy_coeff: 0.009999999999999998
          kl: 0.012861065335991542
          policy_loss: 0.08897484155992667
          total_loss: 0.087480477285054
          vf_explained_var: 0.438167542219162
          vf_loss: 0.009805665821477304
    num_agent_steps_sampled: 1572000
    num_agent_steps_trained: 1572000
    num_steps_sampled: 1572000
    num_steps_trained: 1572000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1572,68774.9,1572000,-3.1103,-2.28,-3.99,311.03


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1573000
  custom_metrics: {}
  date: 2021-10-25_11-38-16
  done: false
  episode_len_mean: 313.48
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -3.1347999999999767
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5800
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21291095561761336
          cur_lr: 5.000000000000001e-05
          entropy: 1.4069105452961392
          entropy_coeff: 0.009999999999999998
          kl: 0.014159617981049718
          policy_loss: 0.0003599397010273404
          total_loss: -0.0019089471962716845
          vf_explained_var: 0.22394530475139618
          vf_loss: 0.008785479589520643
    num_agent_steps_sampled: 1573000
    num_agent_steps_trained: 1573000
    num_steps_sampled: 1573000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1573,68808.8,1573000,-3.1348,-2.29,-3.99,313.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1574000
  custom_metrics: {}
  date: 2021-10-25_11-38-49
  done: false
  episode_len_mean: 314.81
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -3.1480999999999772
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 2
  episodes_total: 5802
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21291095561761336
          cur_lr: 5.000000000000001e-05
          entropy: 1.2301607807477315
          entropy_coeff: 0.009999999999999998
          kl: 0.01338864920069085
          policy_loss: -0.11282710250880983
          total_loss: -0.11248386436038547
          vf_explained_var: 0.2802352011203766
          vf_loss: 0.009794251535398264
    num_agent_steps_sampled: 1574000
    num_agent_steps_trained: 1574000
    num_steps_sampled: 1574000
    num_steps_trained: 15740

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1574,68841.7,1574000,-3.1481,-2.29,-3.99,314.81


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1575000
  custom_metrics: {}
  date: 2021-10-25_11-39-26
  done: false
  episode_len_mean: 317.13
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -3.1712999999999765
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5805
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21291095561761336
          cur_lr: 5.000000000000001e-05
          entropy: 1.2087383998764887
          entropy_coeff: 0.009999999999999998
          kl: 0.01081800858485157
          policy_loss: -0.12928136496080292
          total_loss: -0.1247977009250058
          vf_explained_var: 0.18905292451381683
          vf_loss: 0.014267773326072429
    num_agent_steps_sampled: 1575000
    num_agent_steps_trained: 1575000
    num_steps_sampled: 1575000
    num_steps_trained: 15750

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1575,68878.2,1575000,-3.1713,-2.29,-3.99,317.13


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1576000
  custom_metrics: {}
  date: 2021-10-25_11-40-03
  done: false
  episode_len_mean: 318.24
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -3.1823999999999764
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5809
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21291095561761336
          cur_lr: 5.000000000000001e-05
          entropy: 1.2409427192476061
          entropy_coeff: 0.009999999999999998
          kl: 0.012332148637197173
          policy_loss: 0.03363034601012866
          total_loss: 0.03700838776098357
          vf_explained_var: 0.18547169864177704
          vf_loss: 0.013161819345421261
    num_agent_steps_sampled: 1576000
    num_agent_steps_trained: 1576000
    num_steps_sampled: 1576000
    num_steps_trained: 15760

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1576,68915.7,1576000,-3.1824,-2.29,-3.99,318.24


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1577000
  custom_metrics: {}
  date: 2021-10-25_11-40-43
  done: false
  episode_len_mean: 318.38
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -3.183799999999976
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5812
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21291095561761336
          cur_lr: 5.000000000000001e-05
          entropy: 1.211014567481147
          entropy_coeff: 0.009999999999999998
          kl: 0.008016920271887783
          policy_loss: -0.01738615764511956
          total_loss: -0.018443922532929313
          vf_explained_var: 0.25892144441604614
          vf_loss: 0.009345489222970274
    num_agent_steps_sampled: 1577000
    num_agent_steps_trained: 1577000
    num_steps_sampled: 1577000
    num_steps_trained: 1577

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1577,68955.6,1577000,-3.1838,-2.29,-3.99,318.38


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1578000
  custom_metrics: {}
  date: 2021-10-25_11-41-22
  done: false
  episode_len_mean: 317.65
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -3.176499999999976
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5816
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21291095561761336
          cur_lr: 5.000000000000001e-05
          entropy: 1.0886746790673998
          entropy_coeff: 0.009999999999999998
          kl: 0.012029339145638416
          policy_loss: -0.0006684641871187422
          total_loss: 0.004968211634291543
          vf_explained_var: 0.1889447718858719
          vf_loss: 0.013962243466327589
    num_agent_steps_sampled: 1578000
    num_agent_steps_trained: 1578000
    num_steps_sampled: 1578000
    num_steps_trained: 157

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1578,68994,1578000,-3.1765,-2.29,-3.99,317.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1579000
  custom_metrics: {}
  date: 2021-10-25_11-42-01
  done: false
  episode_len_mean: 315.86
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -3.1585999999999763
  episode_reward_min: -3.8199999999999625
  episodes_this_iter: 3
  episodes_total: 5819
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21291095561761336
          cur_lr: 5.000000000000001e-05
          entropy: 1.2245912088288202
          entropy_coeff: 0.009999999999999998
          kl: 0.009576711881205648
          policy_loss: 0.03846786038743125
          total_loss: 0.038143608967463175
          vf_explained_var: 0.26419827342033386
          vf_loss: 0.00988267289293516
    num_agent_steps_sampled: 1579000
    num_agent_steps_trained: 1579000
    num_steps_sampled: 1579000
    num_steps_trained: 1579

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1579,69033.7,1579000,-3.1586,-2.29,-3.82,315.86




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1580000
  custom_metrics: {}
  date: 2021-10-25_11-43-00
  done: false
  episode_len_mean: 314.49
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -3.1448999999999767
  episode_reward_min: -3.8199999999999625
  episodes_this_iter: 4
  episodes_total: 5823
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21291095561761336
          cur_lr: 5.000000000000001e-05
          entropy: 1.1408752348687914
          entropy_coeff: 0.009999999999999998
          kl: 0.014129702733463981
          policy_loss: -0.005970689571566052
          total_loss: -0.00025828613175286186
          vf_explained_var: 0.2084052860736847
          vf_loss: 0.014112785851789846
    num_agent_steps_sampled: 1580000
    num_agent_steps_trained: 1580000
    num_steps_sampled: 1580000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1580,69092.7,1580000,-3.1449,-2.29,-3.82,314.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1581000
  custom_metrics: {}
  date: 2021-10-25_11-43-41
  done: false
  episode_len_mean: 315.18
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -3.151799999999976
  episode_reward_min: -3.8199999999999625
  episodes_this_iter: 3
  episodes_total: 5826
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21291095561761336
          cur_lr: 5.000000000000001e-05
          entropy: 1.1018663618299696
          entropy_coeff: 0.009999999999999998
          kl: 0.00691625571375438
          policy_loss: 0.027778279781341553
          total_loss: 0.02819693154758877
          vf_explained_var: 0.3241219222545624
          vf_loss: 0.00996476976498444
    num_agent_steps_sampled: 1581000
    num_agent_steps_trained: 1581000
    num_steps_sampled: 1581000
    num_steps_trained: 1581000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1581,69133.5,1581000,-3.1518,-2.29,-3.82,315.18


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1582000
  custom_metrics: {}
  date: 2021-10-25_11-44-22
  done: false
  episode_len_mean: 315.38
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.153799999999976
  episode_reward_min: -3.8199999999999625
  episodes_this_iter: 4
  episodes_total: 5830
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21291095561761336
          cur_lr: 5.000000000000001e-05
          entropy: 1.043152724372016
          entropy_coeff: 0.009999999999999998
          kl: 0.011170291357037362
          policy_loss: -0.010426118887133068
          total_loss: -0.0047076226108604
          vf_explained_var: 0.2542817294597626
          vf_loss: 0.013771745252112548
    num_agent_steps_sampled: 1582000
    num_agent_steps_trained: 1582000
    num_steps_sampled: 1582000
    num_steps_trained: 1582

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1582,69174.7,1582000,-3.1538,-2.55,-3.82,315.38


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1583000
  custom_metrics: {}
  date: 2021-10-25_11-45-04
  done: false
  episode_len_mean: 315.02
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.1501999999999764
  episode_reward_min: -3.8199999999999625
  episodes_this_iter: 3
  episodes_total: 5833
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21291095561761336
          cur_lr: 5.000000000000001e-05
          entropy: 1.0754573040538364
          entropy_coeff: 0.009999999999999998
          kl: 0.0028321016723701574
          policy_loss: -0.02582861060897509
          total_loss: -0.02629385573996438
          vf_explained_var: 0.30072399973869324
          vf_loss: 0.009686344153144294
    num_agent_steps_sampled: 1583000
    num_agent_steps_trained: 1583000
    num_steps_sampled: 1583000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1583,69216,1583000,-3.1502,-2.55,-3.82,315.02


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1584000
  custom_metrics: {}
  date: 2021-10-25_11-45-44
  done: false
  episode_len_mean: 314.38
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.143799999999976
  episode_reward_min: -3.8199999999999625
  episodes_this_iter: 4
  episodes_total: 5837
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10645547780880668
          cur_lr: 5.000000000000001e-05
          entropy: 1.0017213980356852
          entropy_coeff: 0.009999999999999998
          kl: 0.011070269796786648
          policy_loss: 0.0015782910502619213
          total_loss: 0.0065855478660927875
          vf_explained_var: 0.29281288385391235
          vf_loss: 0.013845979577551286
    num_agent_steps_sampled: 1584000
    num_agent_steps_trained: 1584000
    num_steps_sampled: 1584000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1584,69256,1584000,-3.1438,-2.55,-3.82,314.38


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1585000
  custom_metrics: {}
  date: 2021-10-25_11-46-26
  done: false
  episode_len_mean: 313.47
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.134699999999977
  episode_reward_min: -3.8199999999999625
  episodes_this_iter: 3
  episodes_total: 5840
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10645547780880668
          cur_lr: 5.000000000000001e-05
          entropy: 0.9353007826540205
          entropy_coeff: 0.009999999999999998
          kl: 0.00845483963137615
          policy_loss: -0.005269611212942335
          total_loss: -0.004513558662599988
          vf_explained_var: 0.3308548033237457
          vf_loss: 0.009208994881353445
    num_agent_steps_sampled: 1585000
    num_agent_steps_trained: 1585000
    num_steps_sampled: 1585000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1585,69298.5,1585000,-3.1347,-2.55,-3.82,313.47


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1586000
  custom_metrics: {}
  date: 2021-10-25_11-47-05
  done: false
  episode_len_mean: 312.95
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.129499999999977
  episode_reward_min: -3.8199999999999625
  episodes_this_iter: 4
  episodes_total: 5844
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10645547780880668
          cur_lr: 5.000000000000001e-05
          entropy: 0.9753976345062256
          entropy_coeff: 0.009999999999999998
          kl: 0.01002572576468405
          policy_loss: 0.026211765532692274
          total_loss: 0.03138932697474957
          vf_explained_var: 0.28293275833129883
          vf_loss: 0.013864243382381068
    num_agent_steps_sampled: 1586000
    num_agent_steps_trained: 1586000
    num_steps_sampled: 1586000
    num_steps_trained: 1586

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1586,69337.7,1586000,-3.1295,-2.55,-3.82,312.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1587000
  custom_metrics: {}
  date: 2021-10-25_11-47-46
  done: false
  episode_len_mean: 312.76
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.1275999999999766
  episode_reward_min: -3.8199999999999625
  episodes_this_iter: 3
  episodes_total: 5847
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10645547780880668
          cur_lr: 5.000000000000001e-05
          entropy: 0.955313511689504
          entropy_coeff: 0.009999999999999998
          kl: 0.009923256263979852
          policy_loss: -0.09778879516654544
          total_loss: -0.09245574532283676
          vf_explained_var: 0.3791569173336029
          vf_loss: 0.013829801593803696
    num_agent_steps_sampled: 1587000
    num_agent_steps_trained: 1587000
    num_steps_sampled: 1587000
    num_steps_trained: 158

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1587,69378.6,1587000,-3.1276,-2.55,-3.82,312.76




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1588000
  custom_metrics: {}
  date: 2021-10-25_11-48-46
  done: false
  episode_len_mean: 311.46
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.1145999999999776
  episode_reward_min: -3.8199999999999625
  episodes_this_iter: 4
  episodes_total: 5851
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10645547780880668
          cur_lr: 5.000000000000001e-05
          entropy: 1.0360526581605276
          entropy_coeff: 0.009999999999999998
          kl: 0.011337765682855484
          policy_loss: 0.016793460978402033
          total_loss: 0.020370985484785505
          vf_explained_var: 0.449010968208313
          vf_loss: 0.012731079871041908
    num_agent_steps_sampled: 1588000
    num_agent_steps_trained: 1588000
    num_steps_sampled: 1588000
    num_steps_trained: 158

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1588,69438.5,1588000,-3.1146,-2.55,-3.82,311.46


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1589000
  custom_metrics: {}
  date: 2021-10-25_11-49-30
  done: false
  episode_len_mean: 309.6
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.0959999999999774
  episode_reward_min: -3.8199999999999625
  episodes_this_iter: 4
  episodes_total: 5855
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10645547780880668
          cur_lr: 5.000000000000001e-05
          entropy: 1.0363871998257108
          entropy_coeff: 0.009999999999999998
          kl: 0.007813724363615366
          policy_loss: 0.027717326829830805
          total_loss: 0.027764880657196046
          vf_explained_var: 0.5698422193527222
          vf_loss: 0.00957961582009577
    num_agent_steps_sampled: 1589000
    num_agent_steps_trained: 1589000
    num_steps_sampled: 1589000
    num_steps_trained: 1589

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1589,69482,1589000,-3.096,-2.55,-3.82,309.6


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1590000
  custom_metrics: {}
  date: 2021-10-25_11-50-10
  done: false
  episode_len_mean: 307.74
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.0773999999999777
  episode_reward_min: -3.8199999999999625
  episodes_this_iter: 3
  episodes_total: 5858
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10645547780880668
          cur_lr: 5.000000000000001e-05
          entropy: 1.196248718102773
          entropy_coeff: 0.009999999999999998
          kl: 0.014603693656525725
          policy_loss: 0.04540264126327303
          total_loss: 0.04367395374510023
          vf_explained_var: 0.6057980060577393
          vf_loss: 0.008679157166301996
    num_agent_steps_sampled: 1590000
    num_agent_steps_trained: 1590000
    num_steps_sampled: 1590000
    num_steps_trained: 15900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1590,69521.8,1590000,-3.0774,-2.55,-3.82,307.74


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1591000
  custom_metrics: {}
  date: 2021-10-25_11-50-52
  done: false
  episode_len_mean: 305.54
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.0553999999999792
  episode_reward_min: -3.8199999999999625
  episodes_this_iter: 4
  episodes_total: 5862
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10645547780880668
          cur_lr: 5.000000000000001e-05
          entropy: 1.1737583531273736
          entropy_coeff: 0.009999999999999998
          kl: 0.009098465645726783
          policy_loss: -0.007671362078852124
          total_loss: -0.006031116139557626
          vf_explained_var: 0.5069385766983032
          vf_loss: 0.012409247106148137
    num_agent_steps_sampled: 1591000
    num_agent_steps_trained: 1591000
    num_steps_sampled: 1591000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1591,69563.9,1591000,-3.0554,-2.55,-3.82,305.54


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1592000
  custom_metrics: {}
  date: 2021-10-25_11-51-31
  done: false
  episode_len_mean: 303.82
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.0381999999999802
  episode_reward_min: -3.8199999999999625
  episodes_this_iter: 3
  episodes_total: 5865
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10645547780880668
          cur_lr: 5.000000000000001e-05
          entropy: 1.304818054040273
          entropy_coeff: 0.009999999999999998
          kl: 0.010886140877124953
          policy_loss: 0.02669404877556695
          total_loss: 0.023467821876207988
          vf_explained_var: 0.6778876185417175
          vf_loss: 0.008663064383694696
    num_agent_steps_sampled: 1592000
    num_agent_steps_trained: 1592000
    num_steps_sampled: 1592000
    num_steps_trained: 1592

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1592,69602.8,1592000,-3.0382,-2.55,-3.82,303.82


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1593000
  custom_metrics: {}
  date: 2021-10-25_11-52-11
  done: false
  episode_len_mean: 302.88
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.0287999999999795
  episode_reward_min: -3.8199999999999625
  episodes_this_iter: 3
  episodes_total: 5868
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10645547780880668
          cur_lr: 5.000000000000001e-05
          entropy: 1.2685152888298035
          entropy_coeff: 0.009999999999999998
          kl: 0.009226301103249549
          policy_loss: 0.02046942263841629
          total_loss: 0.017718822840187283
          vf_explained_var: 0.6341848969459534
          vf_loss: 0.008952363263556942
    num_agent_steps_sampled: 1593000
    num_agent_steps_trained: 1593000
    num_steps_sampled: 1593000
    num_steps_trained: 159

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1593,69642.6,1593000,-3.0288,-2.55,-3.82,302.88


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1594000
  custom_metrics: {}
  date: 2021-10-25_11-52-51
  done: false
  episode_len_mean: 302.51
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.025099999999979
  episode_reward_min: -3.8199999999999625
  episodes_this_iter: 4
  episodes_total: 5872
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10645547780880668
          cur_lr: 5.000000000000001e-05
          entropy: 1.3722080495622424
          entropy_coeff: 0.009999999999999998
          kl: 0.01337197796531942
          policy_loss: -0.011602633115318086
          total_loss: -0.011976176003615061
          vf_explained_var: 0.600567638874054
          vf_loss: 0.011925017305960258
    num_agent_steps_sampled: 1594000
    num_agent_steps_trained: 1594000
    num_steps_sampled: 1594000
    num_steps_trained: 159

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1594,69682.7,1594000,-3.0251,-2.55,-3.82,302.51


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1595000
  custom_metrics: {}
  date: 2021-10-25_11-53-27
  done: false
  episode_len_mean: 302.3
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.0229999999999797
  episode_reward_min: -3.8199999999999625
  episodes_this_iter: 3
  episodes_total: 5875
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10645547780880668
          cur_lr: 5.000000000000001e-05
          entropy: 1.3156242622269525
          entropy_coeff: 0.009999999999999998
          kl: 0.01286536906318276
          policy_loss: 0.021401594579219817
          total_loss: 0.017603481147024366
          vf_explained_var: 0.7146561741828918
          vf_loss: 0.007988539097520212
    num_agent_steps_sampled: 1595000
    num_agent_steps_trained: 1595000
    num_steps_sampled: 1595000
    num_steps_trained: 1595

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1595,69719.4,1595000,-3.023,-2.55,-3.82,302.3


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1596000
  custom_metrics: {}
  date: 2021-10-25_11-54-00
  done: false
  episode_len_mean: 302.85
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.0284999999999793
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5878
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10645547780880668
          cur_lr: 5.000000000000001e-05
          entropy: 1.377197915977902
          entropy_coeff: 0.009999999999999998
          kl: 0.03187911017901652
          policy_loss: 0.058329033272133936
          total_loss: 0.053575934221347175
          vf_explained_var: 0.6686278581619263
          vf_loss: 0.0056251730834547846
    num_agent_steps_sampled: 1596000
    num_agent_steps_trained: 1596000
    num_steps_sampled: 1596000
    num_steps_trained: 1596

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1596,69752.4,1596000,-3.0285,-2.55,-3.99,302.85




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1597000
  custom_metrics: {}
  date: 2021-10-25_11-55-01
  done: false
  episode_len_mean: 301.2
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.0119999999999787
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5881
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15968321671321004
          cur_lr: 5.000000000000001e-05
          entropy: 1.411592345767551
          entropy_coeff: 0.009999999999999998
          kl: 0.008512437758550264
          policy_loss: -0.10542585609687699
          total_loss: -0.10617939457297325
          vf_explained_var: 0.5294850468635559
          vf_loss: 0.012003093461195628
    num_agent_steps_sampled: 1597000
    num_agent_steps_trained: 1597000
    num_steps_sampled: 1597000
    num_steps_trained: 15970

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1597,69812.8,1597000,-3.012,-2.55,-3.99,301.2


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1598000
  custom_metrics: {}
  date: 2021-10-25_11-55-39
  done: false
  episode_len_mean: 299.52
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -2.99519999999998
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5885
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15968321671321004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5353190077675714
          entropy_coeff: 0.009999999999999998
          kl: 0.012277676418750064
          policy_loss: -0.0028946724202897813
          total_loss: -0.005188981857564714
          vf_explained_var: 0.5711511969566345
          vf_loss: 0.011098343051142164
    num_agent_steps_sampled: 1598000
    num_agent_steps_trained: 1598000
    num_steps_sampled: 1598000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1598,69851.3,1598000,-2.9952,-2.55,-3.99,299.52


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1599000
  custom_metrics: {}
  date: 2021-10-25_11-56-19
  done: false
  episode_len_mean: 298.2
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -2.9819999999999798
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5888
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15968321671321004
          cur_lr: 5.000000000000001e-05
          entropy: 1.499904469648997
          entropy_coeff: 0.009999999999999998
          kl: 0.012352894354613635
          policy_loss: 0.052584306481811734
          total_loss: 0.04752992913126945
          vf_explained_var: 0.6055001616477966
          vf_loss: 0.00797211740993791
    num_agent_steps_sampled: 1599000
    num_agent_steps_trained: 1599000
    num_steps_sampled: 1599000
    num_steps_trained: 1599000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1599,69891,1599000,-2.982,-2.55,-3.99,298.2


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1600000
  custom_metrics: {}
  date: 2021-10-25_11-56-59
  done: false
  episode_len_mean: 297.41
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -2.9740999999999804
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5891
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15968321671321004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4299281001091004
          entropy_coeff: 0.009999999999999998
          kl: 0.01721823912064527
          policy_loss: -0.08555493859781159
          total_loss: -0.0844008293416765
          vf_explained_var: 0.4607105255126953
          vf_loss: 0.012703929913954602
    num_agent_steps_sampled: 1600000
    num_agent_steps_trained: 1600000
    num_steps_sampled: 1600000
    num_steps_trained: 16000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1600,69930.4,1600000,-2.9741,-2.55,-3.99,297.41


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1601000
  custom_metrics: {}
  date: 2021-10-25_11-57-38
  done: false
  episode_len_mean: 296.67
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -2.9666999999999804
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5895
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15968321671321004
          cur_lr: 5.000000000000001e-05
          entropy: 1.3626986119482252
          entropy_coeff: 0.009999999999999998
          kl: 0.011016891889801607
          policy_loss: 0.0036981273856427936
          total_loss: 0.003918313442005051
          vf_explained_var: 0.48908063769340515
          vf_loss: 0.012087960023846891
    num_agent_steps_sampled: 1601000
    num_agent_steps_trained: 1601000
    num_steps_sampled: 1601000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1601,69969.6,1601000,-2.9667,-2.55,-3.99,296.67


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1602000
  custom_metrics: {}
  date: 2021-10-25_11-58-17
  done: false
  episode_len_mean: 295.2
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -2.9519999999999813
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5898
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15968321671321004
          cur_lr: 5.000000000000001e-05
          entropy: 1.3402582049369811
          entropy_coeff: 0.009999999999999998
          kl: 0.021076103565545404
          policy_loss: 0.03400318572918574
          total_loss: 0.03027594784895579
          vf_explained_var: 0.6236076951026917
          vf_loss: 0.006309840104141686
    num_agent_steps_sampled: 1602000
    num_agent_steps_trained: 1602000
    num_steps_sampled: 1602000
    num_steps_trained: 160200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1602,70008.6,1602000,-2.952,-2.55,-3.99,295.2


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1603000
  custom_metrics: {}
  date: 2021-10-25_11-58-55
  done: false
  episode_len_mean: 293.94
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -2.9393999999999814
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5901
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.239524825069815
          cur_lr: 5.000000000000001e-05
          entropy: 1.4617304046948751
          entropy_coeff: 0.009999999999999998
          kl: 0.009628975979347294
          policy_loss: -0.10779632586571905
          total_loss: -0.10700173079967498
          vf_explained_var: 0.33087316155433655
          vf_loss: 0.013105522106505103
    num_agent_steps_sampled: 1603000
    num_agent_steps_trained: 1603000
    num_steps_sampled: 1603000
    num_steps_trained: 1603

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1603,70047.2,1603000,-2.9394,-2.55,-3.99,293.94


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1604000
  custom_metrics: {}
  date: 2021-10-25_11-59-32
  done: false
  episode_len_mean: 292.54
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -2.9253999999999816
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5905
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.239524825069815
          cur_lr: 5.000000000000001e-05
          entropy: 1.4782685399055482
          entropy_coeff: 0.009999999999999998
          kl: 0.01182021283202095
          policy_loss: -0.023187854140996934
          total_loss: -0.022890074219968585
          vf_explained_var: 0.44110602140426636
          vf_loss: 0.012249233459846841
    num_agent_steps_sampled: 1604000
    num_agent_steps_trained: 1604000
    num_steps_sampled: 1604000
    num_steps_trained: 160

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1604,70084,1604000,-2.9254,-2.55,-3.99,292.54


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1605000
  custom_metrics: {}
  date: 2021-10-25_12-00-09
  done: false
  episode_len_mean: 292.5
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -2.9249999999999816
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5908
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.239524825069815
          cur_lr: 5.000000000000001e-05
          entropy: 1.4268611828486124
          entropy_coeff: 0.009999999999999998
          kl: 0.00645937895976074
          policy_loss: 0.04446256541543537
          total_loss: 0.04024659784303771
          vf_explained_var: 0.57705157995224
          vf_loss: 0.008505459977055176
    num_agent_steps_sampled: 1605000
    num_agent_steps_trained: 1605000
    num_steps_sampled: 1605000
    num_steps_trained: 1605000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1605,70120.3,1605000,-2.925,-2.55,-3.99,292.5




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1606000
  custom_metrics: {}
  date: 2021-10-25_12-01-05
  done: false
  episode_len_mean: 292.86
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -2.928599999999981
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5911
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.239524825069815
          cur_lr: 5.000000000000001e-05
          entropy: 1.436751867665185
          entropy_coeff: 0.009999999999999998
          kl: 0.006832216502068573
          policy_loss: 0.07345760116974513
          total_loss: 0.06695515861113867
          vf_explained_var: 0.6552963852882385
          vf_loss: 0.006228586719630079
    num_agent_steps_sampled: 1606000
    num_agent_steps_trained: 1606000
    num_steps_sampled: 1606000
    num_steps_trained: 1606000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1606,70176.6,1606000,-2.9286,-2.55,-3.99,292.86


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1607000
  custom_metrics: {}
  date: 2021-10-25_12-01-47
  done: false
  episode_len_mean: 293.07
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -2.9306999999999817
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5915
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.239524825069815
          cur_lr: 5.000000000000001e-05
          entropy: 1.4494172665807936
          entropy_coeff: 0.009999999999999998
          kl: 0.007698981053407532
          policy_loss: -0.020902613964345722
          total_loss: -0.02094190385606554
          vf_explained_var: 0.44643110036849976
          vf_loss: 0.012610785735564099
    num_agent_steps_sampled: 1607000
    num_agent_steps_trained: 1607000
    num_steps_sampled: 1607000
    num_steps_trained: 160

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1607,70218.3,1607000,-2.9307,-2.55,-3.99,293.07


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1608000
  custom_metrics: {}
  date: 2021-10-25_12-02-27
  done: false
  episode_len_mean: 292.82
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -2.9281999999999813
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5918
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.239524825069815
          cur_lr: 5.000000000000001e-05
          entropy: 1.364826828903622
          entropy_coeff: 0.009999999999999998
          kl: 0.009533871931283579
          policy_loss: 0.011368406398428812
          total_loss: 0.009088187333610323
          vf_explained_var: 0.4436746835708618
          vf_loss: 0.009084447380155325
    num_agent_steps_sampled: 1608000
    num_agent_steps_trained: 1608000
    num_steps_sampled: 1608000
    num_steps_trained: 160800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1608,70258.4,1608000,-2.9282,-2.55,-3.99,292.82


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1609000
  custom_metrics: {}
  date: 2021-10-25_12-03-08
  done: false
  episode_len_mean: 292.95
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -2.9294999999999822
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5922
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.239524825069815
          cur_lr: 5.000000000000001e-05
          entropy: 1.4281737155384489
          entropy_coeff: 0.009999999999999998
          kl: 0.008347451300563573
          policy_loss: -0.024651840784483486
          total_loss: -0.02410663722289933
          vf_explained_var: 0.41796112060546875
          vf_loss: 0.012827520289768775
    num_agent_steps_sampled: 1609000
    num_agent_steps_trained: 1609000
    num_steps_sampled: 1609000
    num_steps_trained: 1609

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1609,70299.4,1609000,-2.9295,-2.57,-3.99,292.95


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1610000
  custom_metrics: {}
  date: 2021-10-25_12-03-46
  done: false
  episode_len_mean: 293.09
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -2.9308999999999816
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5925
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.239524825069815
          cur_lr: 5.000000000000001e-05
          entropy: 1.5226086934407552
          entropy_coeff: 0.009999999999999998
          kl: 0.00691126215147801
          policy_loss: 0.05679010003805161
          total_loss: 0.05180335375997755
          vf_explained_var: 0.6223055720329285
          vf_loss: 0.008583918284986995
    num_agent_steps_sampled: 1610000
    num_agent_steps_trained: 1610000
    num_steps_sampled: 1610000
    num_steps_trained: 1610000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1610,70337.2,1610000,-2.9309,-2.57,-3.99,293.09


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1611000
  custom_metrics: {}
  date: 2021-10-25_12-04-26
  done: false
  episode_len_mean: 293.56
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -2.9355999999999813
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5928
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.239524825069815
          cur_lr: 5.000000000000001e-05
          entropy: 1.4348243673642476
          entropy_coeff: 0.009999999999999998
          kl: 0.007062831078906286
          policy_loss: -0.08019765160149998
          total_loss: -0.08090929049584601
          vf_explained_var: 0.5033736824989319
          vf_loss: 0.011944884289469984
    num_agent_steps_sampled: 1611000
    num_agent_steps_trained: 1611000
    num_steps_sampled: 1611000
    num_steps_trained: 161100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1611,70377.2,1611000,-2.9356,-2.57,-3.99,293.56


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1612000
  custom_metrics: {}
  date: 2021-10-25_12-05-07
  done: false
  episode_len_mean: 293.82
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -2.938199999999981
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5932
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.239524825069815
          cur_lr: 5.000000000000001e-05
          entropy: 1.594077710310618
          entropy_coeff: 0.009999999999999998
          kl: 0.009985087763612278
          policy_loss: -0.012392006483342913
          total_loss: -0.013964476519160801
          vf_explained_var: 0.5434393286705017
          vf_loss: 0.011976633448567656
    num_agent_steps_sampled: 1612000
    num_agent_steps_trained: 1612000
    num_steps_sampled: 1612000
    num_steps_trained: 161200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1612,70418.1,1612000,-2.9382,-2.57,-3.99,293.82


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1613000
  custom_metrics: {}
  date: 2021-10-25_12-05-45
  done: false
  episode_len_mean: 294.35
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -2.9434999999999802
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5935
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.239524825069815
          cur_lr: 5.000000000000001e-05
          entropy: 1.5368656820721096
          entropy_coeff: 0.009999999999999998
          kl: 0.008296667097516518
          policy_loss: 0.07338195625278685
          total_loss: 0.06650467630889681
          vf_explained_var: 0.6491774916648865
          vf_loss: 0.0065041182329878214
    num_agent_steps_sampled: 1613000
    num_agent_steps_trained: 1613000
    num_steps_sampled: 1613000
    num_steps_trained: 1613000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1613,70456.1,1613000,-2.9435,-2.57,-3.99,294.35


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1614000
  custom_metrics: {}
  date: 2021-10-25_12-06-25
  done: false
  episode_len_mean: 294.29
  episode_media: {}
  episode_reward_max: -2.5599999999999894
  episode_reward_mean: -2.9428999999999808
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5939
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.239524825069815
          cur_lr: 5.000000000000001e-05
          entropy: 1.5317714717653064
          entropy_coeff: 0.009999999999999998
          kl: 0.010146224063131924
          policy_loss: -0.026826239253083865
          total_loss: -0.027998020417160457
          vf_explained_var: 0.5427207350730896
          vf_loss: 0.011715662758797407
    num_agent_steps_sampled: 1614000
    num_agent_steps_trained: 1614000
    num_steps_sampled: 1614000
    num_steps_trained: 161

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1614,70496.6,1614000,-2.9429,-2.56,-3.99,294.29




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1615000
  custom_metrics: {}
  date: 2021-10-25_12-07-29
  done: false
  episode_len_mean: 293.66
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.9365999999999817
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5943
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.239524825069815
          cur_lr: 5.000000000000001e-05
          entropy: 1.5425164527363247
          entropy_coeff: 0.009999999999999998
          kl: 0.012349386403926448
          policy_loss: -0.024687406420707703
          total_loss: -0.025929209176037048
          vf_explained_var: 0.525100827217102
          vf_loss: 0.011225376236769889
    num_agent_steps_sampled: 1615000
    num_agent_steps_trained: 1615000
    num_steps_sampled: 1615000
    num_steps_trained: 1615

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1615,70560.5,1615000,-2.9366,-2.36,-3.99,293.66


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1616000
  custom_metrics: {}
  date: 2021-10-25_12-08-14
  done: false
  episode_len_mean: 292.97
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.929699999999981
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5946
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.239524825069815
          cur_lr: 5.000000000000001e-05
          entropy: 1.4477070185873244
          entropy_coeff: 0.009999999999999998
          kl: 0.010872573401028854
          policy_loss: -0.10869951753152741
          total_loss: -0.10803880898488892
          vf_explained_var: 0.37090352177619934
          vf_loss: 0.012533531534588999
    num_agent_steps_sampled: 1616000
    num_agent_steps_trained: 1616000
    num_steps_sampled: 1616000
    num_steps_trained: 16160

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1616,70605.7,1616000,-2.9297,-2.36,-3.99,292.97


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1617000
  custom_metrics: {}
  date: 2021-10-25_12-08-59
  done: false
  episode_len_mean: 292.35
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.923499999999982
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5950
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.239524825069815
          cur_lr: 5.000000000000001e-05
          entropy: 1.4686421857939826
          entropy_coeff: 0.009999999999999998
          kl: 0.014069081179354095
          policy_loss: -0.02757742926478386
          total_loss: -0.026867809974484975
          vf_explained_var: 0.3739944100379944
          vf_loss: 0.012026143963966104
    num_agent_steps_sampled: 1617000
    num_agent_steps_trained: 1617000
    num_steps_sampled: 1617000
    num_steps_trained: 16170

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1617,70650.1,1617000,-2.9235,-2.36,-3.99,292.35


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1618000
  custom_metrics: {}
  date: 2021-10-25_12-09-39
  done: false
  episode_len_mean: 292.44
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.9243999999999812
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5954
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.239524825069815
          cur_lr: 5.000000000000001e-05
          entropy: 1.4335124572118123
          entropy_coeff: 0.009999999999999998
          kl: 0.010366506352408085
          policy_loss: 0.009399452101853159
          total_loss: 0.009860305902030733
          vf_explained_var: 0.39291733503341675
          vf_loss: 0.012312942557036877
    num_agent_steps_sampled: 1618000
    num_agent_steps_trained: 1618000
    num_steps_sampled: 1618000
    num_steps_trained: 1618

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1618,70690.2,1618000,-2.9244,-2.36,-3.99,292.44


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1619000
  custom_metrics: {}
  date: 2021-10-25_12-10-24
  done: false
  episode_len_mean: 291.35
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.9134999999999813
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5958
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.239524825069815
          cur_lr: 5.000000000000001e-05
          entropy: 1.3063717484474182
          entropy_coeff: 0.009999999999999998
          kl: 0.004863323405282374
          policy_loss: -0.011649578561385472
          total_loss: -0.011435180654128392
          vf_explained_var: 0.3578413128852844
          vf_loss: 0.01211323036501805
    num_agent_steps_sampled: 1619000
    num_agent_steps_trained: 1619000
    num_steps_sampled: 1619000
    num_steps_trained: 1619

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1619,70735.3,1619000,-2.9135,-2.36,-3.99,291.35


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1620000
  custom_metrics: {}
  date: 2021-10-25_12-11-09
  done: false
  episode_len_mean: 290.13
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.9012999999999813
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 3
  episodes_total: 5961
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1197624125349075
          cur_lr: 5.000000000000001e-05
          entropy: 1.1877959224912855
          entropy_coeff: 0.009999999999999998
          kl: 0.012201720616755552
          policy_loss: -0.11049512252211571
          total_loss: -0.10877711259656482
          vf_explained_var: 0.3039975166320801
          vf_loss: 0.01213465848316749
    num_agent_steps_sampled: 1620000
    num_agent_steps_trained: 1620000
    num_steps_sampled: 1620000
    num_steps_trained: 16200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1620,70780.2,1620000,-2.9013,-2.36,-3.99,290.13


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1621000
  custom_metrics: {}
  date: 2021-10-25_12-11-49
  done: false
  episode_len_mean: 289.28
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.8927999999999825
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5965
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1197624125349075
          cur_lr: 5.000000000000001e-05
          entropy: 1.268045757876502
          entropy_coeff: 0.009999999999999998
          kl: 0.014908377595072705
          policy_loss: -0.0099222831428051
          total_loss: -0.008874514036708407
          vf_explained_var: 0.32809460163116455
          vf_loss: 0.011942764826946789
    num_agent_steps_sampled: 1621000
    num_agent_steps_trained: 1621000
    num_steps_sampled: 1621000
    num_steps_trained: 1621

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1621,70820.2,1621000,-2.8928,-2.36,-3.99,289.28


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1622000
  custom_metrics: {}
  date: 2021-10-25_12-12-35
  done: false
  episode_len_mean: 287.7
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.8769999999999825
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5969
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1197624125349075
          cur_lr: 5.000000000000001e-05
          entropy: 1.1447221888436212
          entropy_coeff: 0.009999999999999998
          kl: 0.009451996407340681
          policy_loss: 0.023966917561160192
          total_loss: 0.023425775435235767
          vf_explained_var: 0.32379087805747986
          vf_loss: 0.009774085599929094
    num_agent_steps_sampled: 1622000
    num_agent_steps_trained: 1622000
    num_steps_sampled: 1622000
    num_steps_trained: 1622

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1622,70866,1622000,-2.877,-2.36,-3.99,287.7




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1623000
  custom_metrics: {}
  date: 2021-10-25_12-13-42
  done: false
  episode_len_mean: 284.65
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.8464999999999834
  episode_reward_min: -3.989999999999959
  episodes_this_iter: 4
  episodes_total: 5973
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1197624125349075
          cur_lr: 5.000000000000001e-05
          entropy: 0.8972253402074178
          entropy_coeff: 0.009999999999999998
          kl: 0.010347191936242695
          policy_loss: -0.08373368953665097
          total_loss: -0.07735389694571496
          vf_explained_var: 0.17786824703216553
          vf_loss: 0.01411283989954326
    num_agent_steps_sampled: 1623000
    num_agent_steps_trained: 1623000
    num_steps_sampled: 1623000
    num_steps_trained: 1623

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1623,70933.3,1623000,-2.8465,-2.12,-3.99,284.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1624000
  custom_metrics: {}
  date: 2021-10-25_12-14-30
  done: false
  episode_len_mean: 280.65
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.806499999999984
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 5
  episodes_total: 5978
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1197624125349075
          cur_lr: 5.000000000000001e-05
          entropy: 0.9595356053776212
          entropy_coeff: 0.009999999999999998
          kl: 0.005432300305363924
          policy_loss: -0.009139827224943373
          total_loss: -0.002838390486107932
          vf_explained_var: 0.1964522749185562
          vf_loss: 0.015246207939667834
    num_agent_steps_sampled: 1624000
    num_agent_steps_trained: 1624000
    num_steps_sampled: 1624000
    num_steps_trained: 162

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1624,70980.9,1624000,-2.8065,-2.12,-3.32,280.65


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1625000
  custom_metrics: {}
  date: 2021-10-25_12-15-11
  done: false
  episode_len_mean: 280.68
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.806799999999983
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 3
  episodes_total: 5981
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1197624125349075
          cur_lr: 5.000000000000001e-05
          entropy: 1.110183709859848
          entropy_coeff: 0.009999999999999998
          kl: 0.01153075389246712
          policy_loss: 0.0023553195926878188
          total_loss: 0.002398383534616894
          vf_explained_var: 0.3646038770675659
          vf_loss: 0.009763952256697747
    num_agent_steps_sampled: 1625000
    num_agent_steps_trained: 1625000
    num_steps_sampled: 1625000
    num_steps_trained: 162500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1625,71021.6,1625000,-2.8068,-2.12,-3.32,280.68


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1626000
  custom_metrics: {}
  date: 2021-10-25_12-15-58
  done: false
  episode_len_mean: 278.87
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.788699999999985
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5985
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1197624125349075
          cur_lr: 5.000000000000001e-05
          entropy: 0.9397354973687067
          entropy_coeff: 0.009999999999999998
          kl: 0.012100582644837567
          policy_loss: -0.04870270366470019
          total_loss: -0.045967821611298455
          vf_explained_var: 0.30112507939338684
          vf_loss: 0.01068304407203363
    num_agent_steps_sampled: 1626000
    num_agent_steps_trained: 1626000
    num_steps_sampled: 1626000
    num_steps_trained: 1626

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1626,71068.5,1626000,-2.7887,-2.12,-3.32,278.87


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1627000
  custom_metrics: {}
  date: 2021-10-25_12-16-45
  done: false
  episode_len_mean: 276.49
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.7648999999999853
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 5989
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1197624125349075
          cur_lr: 5.000000000000001e-05
          entropy: 0.924314581685596
          entropy_coeff: 0.009999999999999998
          kl: 0.005228514366199811
          policy_loss: -0.08446967999140421
          total_loss: -0.07768901967340046
          vf_explained_var: 0.11077331006526947
          vf_loss: 0.015397624526586797
    num_agent_steps_sampled: 1627000
    num_agent_steps_trained: 1627000
    num_steps_sampled: 1627000
    num_steps_trained: 1627

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1627,71116.1,1627000,-2.7649,-2.12,-3.32,276.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1628000
  custom_metrics: {}
  date: 2021-10-25_12-17-31
  done: false
  episode_len_mean: 273.69
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.736899999999985
  episode_reward_min: -3.289999999999974
  episodes_this_iter: 5
  episodes_total: 5994
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1197624125349075
          cur_lr: 5.000000000000001e-05
          entropy: 1.0024427870909374
          entropy_coeff: 0.009999999999999998
          kl: 0.008917383912730949
          policy_loss: -0.01846191684405009
          total_loss: -0.011092787235975265
          vf_explained_var: 0.1814718395471573
          vf_loss: 0.01632559076986379
    num_agent_steps_sampled: 1628000
    num_agent_steps_trained: 1628000
    num_steps_sampled: 1628000
    num_steps_trained: 16280

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1628,71162.4,1628000,-2.7369,-2.12,-3.29,273.69


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1629000
  custom_metrics: {}
  date: 2021-10-25_12-18-17
  done: false
  episode_len_mean: 271.73
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.717299999999986
  episode_reward_min: -3.289999999999974
  episodes_this_iter: 4
  episodes_total: 5998
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1197624125349075
          cur_lr: 5.000000000000001e-05
          entropy: 0.9694838331805335
          entropy_coeff: 0.009999999999999998
          kl: 0.005518804537838741
          policy_loss: -0.004942799773481157
          total_loss: -0.001298242641819848
          vf_explained_var: 0.2308533936738968
          vf_loss: 0.012678452612211307
    num_agent_steps_sampled: 1629000
    num_agent_steps_trained: 1629000
    num_steps_sampled: 1629000
    num_steps_trained: 162

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1629,71208.1,1629000,-2.7173,-2.12,-3.29,271.73




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1630000
  custom_metrics: {}
  date: 2021-10-25_12-19-23
  done: false
  episode_len_mean: 268.58
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.6857999999999858
  episode_reward_min: -3.2099999999999755
  episodes_this_iter: 4
  episodes_total: 6002
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1197624125349075
          cur_lr: 5.000000000000001e-05
          entropy: 0.9352269695864783
          entropy_coeff: 0.009999999999999998
          kl: 0.004311515205732361
          policy_loss: 0.017862725092305076
          total_loss: 0.02186801234881083
          vf_explained_var: 0.22224894165992737
          vf_loss: 0.012841198086324666
    num_agent_steps_sampled: 1630000
    num_agent_steps_trained: 1630000
    num_steps_sampled: 1630000
    num_steps_trained: 163

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1630,71273.7,1630000,-2.6858,-2.09,-3.21,268.58


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1631000
  custom_metrics: {}
  date: 2021-10-25_12-20-06
  done: false
  episode_len_mean: 267.15
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.6714999999999867
  episode_reward_min: -3.179999999999976
  episodes_this_iter: 4
  episodes_total: 6006
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05988120626745375
          cur_lr: 5.000000000000001e-05
          entropy: 0.9775765147474077
          entropy_coeff: 0.009999999999999998
          kl: 0.02818477287495611
          policy_loss: -0.06961374005509748
          total_loss: -0.06434495951980353
          vf_explained_var: 0.22059045732021332
          vf_loss: 0.013356807403680351
    num_agent_steps_sampled: 1631000
    num_agent_steps_trained: 1631000
    num_steps_sampled: 1631000
    num_steps_trained: 163

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1631,71316.9,1631000,-2.6715,-2.09,-3.18,267.15


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1632000
  custom_metrics: {}
  date: 2021-10-25_12-20-45
  done: false
  episode_len_mean: 266.49
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.664899999999987
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 6009
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08982180940118065
          cur_lr: 5.000000000000001e-05
          entropy: 1.2385284337732527
          entropy_coeff: 0.009999999999999998
          kl: 0.018880232199353485
          policy_loss: 0.041263840016391544
          total_loss: 0.03944107765952746
          vf_explained_var: 0.3239821791648865
          vf_loss: 0.00886666791047901
    num_agent_steps_sampled: 1632000
    num_agent_steps_trained: 1632000
    num_steps_sampled: 1632000
    num_steps_trained: 163200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1632,71355.4,1632000,-2.6649,-2.09,-3.9,266.49


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1633000
  custom_metrics: {}
  date: 2021-10-25_12-21-11
  done: false
  episode_len_mean: 269.35
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.6934999999999865
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 2
  episodes_total: 6011
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08982180940118065
          cur_lr: 5.000000000000001e-05
          entropy: 1.6669503887494406
          entropy_coeff: 0.009999999999999998
          kl: 0.03003075273894543
          policy_loss: -0.14958433624770906
          total_loss: -0.15593534492784075
          vf_explained_var: 0.193428635597229
          vf_loss: 0.007621080977130785
    num_agent_steps_sampled: 1633000
    num_agent_steps_trained: 1633000
    num_steps_sampled: 1633000
    num_steps_trained: 16330

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1633,71381.9,1633000,-2.6935,-2.09,-5.19,269.35


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1634000
  custom_metrics: {}
  date: 2021-10-25_12-21-51
  done: false
  episode_len_mean: 269.28
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.692799999999986
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 6014
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13473271410177093
          cur_lr: 5.000000000000001e-05
          entropy: 0.8064999908208847
          entropy_coeff: 0.009999999999999998
          kl: 0.012988542703162409
          policy_loss: -0.05801862022942967
          total_loss: -0.05465955618355009
          vf_explained_var: 0.22892235219478607
          vf_loss: 0.009674084859175815
    num_agent_steps_sampled: 1634000
    num_agent_steps_trained: 1634000
    num_steps_sampled: 1634000
    num_steps_trained: 163

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1634,71421.4,1634000,-2.6928,-2.09,-5.19,269.28


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1635000
  custom_metrics: {}
  date: 2021-10-25_12-22-37
  done: false
  episode_len_mean: 269.84
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.698399999999986
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 4
  episodes_total: 6018
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13473271410177093
          cur_lr: 5.000000000000001e-05
          entropy: 0.5914358884096146
          entropy_coeff: 0.009999999999999998
          kl: 0.005573115639524673
          policy_loss: 0.002021296736266878
          total_loss: 0.009295997768640518
          vf_explained_var: -0.01732255145907402
          vf_loss: 0.012438174563511793
    num_agent_steps_sampled: 1635000
    num_agent_steps_trained: 1635000
    num_steps_sampled: 1635000
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1635,71467.7,1635000,-2.6984,-2.09,-5.19,269.84


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1636000
  custom_metrics: {}
  date: 2021-10-25_12-23-25
  done: false
  episode_len_mean: 267.08
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.670799999999987
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 5
  episodes_total: 6023
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13473271410177093
          cur_lr: 5.000000000000001e-05
          entropy: 0.4635128951734967
          entropy_coeff: 0.009999999999999998
          kl: 0.005051011572808762
          policy_loss: -0.017083479712406795
          total_loss: -0.005711557302210066
          vf_explained_var: 0.14593827724456787
          vf_loss: 0.015326515895624955
    num_agent_steps_sampled: 1636000
    num_agent_steps_trained: 1636000
    num_steps_sampled: 1636000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1636,71515.6,1636000,-2.6708,-2.09,-5.19,267.08


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1637000
  custom_metrics: {}
  date: 2021-10-25_12-24-13
  done: false
  episode_len_mean: 264.53
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.6452999999999864
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 4
  episodes_total: 6027
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13473271410177093
          cur_lr: 5.000000000000001e-05
          entropy: 0.5037818183501561
          entropy_coeff: 0.009999999999999998
          kl: 0.004219183184532937
          policy_loss: 0.016131995080245865
          total_loss: 0.02406366036997901
          vf_explained_var: 0.1333373486995697
          vf_loss: 0.012401018146839407
    num_agent_steps_sampled: 1637000
    num_agent_steps_trained: 1637000
    num_steps_sampled: 1637000
    num_steps_trained: 1637

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1637,71563.5,1637000,-2.6453,-2.09,-5.19,264.53




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1638000
  custom_metrics: {}
  date: 2021-10-25_12-25-23
  done: false
  episode_len_mean: 261.48
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.6147999999999887
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 4
  episodes_total: 6031
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06736635705088546
          cur_lr: 5.000000000000001e-05
          entropy: 0.4087000283930037
          entropy_coeff: 0.009999999999999998
          kl: 0.004911209978788773
          policy_loss: -0.10014378329118093
          total_loss: -0.08937792794571983
          vf_explained_var: 0.11757795512676239
          vf_loss: 0.014522001136922175
    num_agent_steps_sampled: 1638000
    num_agent_steps_trained: 1638000
    num_steps_sampled: 1638000
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1638,71633.2,1638000,-2.6148,-1.97,-5.19,261.48


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1639000
  custom_metrics: {}
  date: 2021-10-25_12-26-10
  done: false
  episode_len_mean: 258.28
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.582799999999989
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 5
  episodes_total: 6036
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03368317852544273
          cur_lr: 5.000000000000001e-05
          entropy: 0.5008239070574443
          entropy_coeff: 0.009999999999999998
          kl: 0.003957147862699194
          policy_loss: 0.01271221319006549
          total_loss: 0.021284345992737347
          vf_explained_var: 0.10976610332727432
          vf_loss: 0.013447085323019161
    num_agent_steps_sampled: 1639000
    num_agent_steps_trained: 1639000
    num_steps_sampled: 1639000
    num_steps_trained: 1639

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1639,71680.9,1639000,-2.5828,-1.97,-5.19,258.28


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1640000
  custom_metrics: {}
  date: 2021-10-25_12-27-01
  done: false
  episode_len_mean: 256.71
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.5670999999999897
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 4
  episodes_total: 6040
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.016841589262721366
          cur_lr: 5.000000000000001e-05
          entropy: 0.3668733514017529
          entropy_coeff: 0.009999999999999998
          kl: 0.0035073597569578395
          policy_loss: 0.012182488623592589
          total_loss: 0.02186636875073115
          vf_explained_var: 0.07550965249538422
          vf_loss: 0.013293546893530421
    num_agent_steps_sampled: 1640000
    num_agent_steps_trained: 1640000
    num_steps_sampled: 1640000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1640,71731.7,1640000,-2.5671,-1.97,-5.19,256.71


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1641000
  custom_metrics: {}
  date: 2021-10-25_12-27-50
  done: false
  episode_len_mean: 254.69
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.5468999999999897
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 5
  episodes_total: 6045
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.008420794631360683
          cur_lr: 5.000000000000001e-05
          entropy: 0.505164486169815
          entropy_coeff: 0.009999999999999998
          kl: 0.012319122475644583
          policy_loss: -0.02574746881922086
          total_loss: -0.014008533706267675
          vf_explained_var: 0.10840515792369843
          vf_loss: 0.016686842704398765
    num_agent_steps_sampled: 1641000
    num_agent_steps_trained: 1641000
    num_steps_sampled: 1641000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1641,71780.5,1641000,-2.5469,-1.97,-5.19,254.69


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1642000
  custom_metrics: {}
  date: 2021-10-25_12-28-40
  done: false
  episode_len_mean: 253.72
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.5371999999999897
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 4
  episodes_total: 6049
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.008420794631360683
          cur_lr: 5.000000000000001e-05
          entropy: 0.5755273385180367
          entropy_coeff: 0.009999999999999998
          kl: 0.010132706077022628
          policy_loss: 0.036820399761199954
          total_loss: 0.04283394482400682
          vf_explained_var: 0.13704180717468262
          vf_loss: 0.011683494763241873
    num_agent_steps_sampled: 1642000
    num_agent_steps_trained: 1642000
    num_steps_sampled: 1642000
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1642,71830.2,1642000,-2.5372,-1.97,-5.19,253.72


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1643000
  custom_metrics: {}
  date: 2021-10-25_12-29-13
  done: false
  episode_len_mean: 255.82
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.558199999999989
  episode_reward_min: -5.209999999999933
  episodes_this_iter: 3
  episodes_total: 6052
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.008420794631360683
          cur_lr: 5.000000000000001e-05
          entropy: 1.4517706784937117
          entropy_coeff: 0.009999999999999998
          kl: 0.05017886261780655
          policy_loss: 0.09041112860043844
          total_loss: 0.08329427010483212
          vf_explained_var: -0.16898877918720245
          vf_loss: 0.006978303816787148
    num_agent_steps_sampled: 1643000
    num_agent_steps_trained: 1643000
    num_steps_sampled: 1643000
    num_steps_trained: 1643

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1643,71863.8,1643000,-2.5582,-1.97,-5.21,255.82


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1644000
  custom_metrics: {}
  date: 2021-10-25_12-29-59
  done: false
  episode_len_mean: 254.69
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.5468999999999897
  episode_reward_min: -5.209999999999933
  episodes_this_iter: 4
  episodes_total: 6056
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012631191947041024
          cur_lr: 5.000000000000001e-05
          entropy: 0.6240266826417711
          entropy_coeff: 0.009999999999999998
          kl: 0.01306009121918219
          policy_loss: 0.03225767215092977
          total_loss: 0.038847958544890086
          vf_explained_var: 0.19446012377738953
          vf_loss: 0.01266559066457881
    num_agent_steps_sampled: 1644000
    num_agent_steps_trained: 1644000
    num_steps_sampled: 1644000
    num_steps_trained: 1644

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1644,71909.7,1644000,-2.5469,-1.97,-5.21,254.69




Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1645000
  custom_metrics: {}
  date: 2021-10-25_12-31-01
  done: false
  episode_len_mean: 254.37
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.5436999999999896
  episode_reward_min: -5.209999999999933
  episodes_this_iter: 4
  episodes_total: 6060
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012631191947041024
          cur_lr: 5.000000000000001e-05
          entropy: 0.9379465255472396
          entropy_coeff: 0.009999999999999998
          kl: 0.0294471602396184
          policy_loss: 0.024241585408647855
          total_loss: 0.029096478472153346
          vf_explained_var: 0.303225040435791
          vf_loss: 0.013862405427628094
    num_agent_steps_sampled: 1645000
    num_agent_steps_trained: 1645000
    num_steps_sampled: 1645000
    num_steps_trained: 16450

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1645,71971.5,1645000,-2.5437,-1.97,-5.21,254.37


Result for PPO_my_env_b550d_00000:
  agent_timesteps_total: 1646000
  custom_metrics: {}
  date: 2021-10-25_12-31-54
  done: false
  episode_len_mean: 253.25
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.5324999999999904
  episode_reward_min: -5.209999999999933
  episodes_this_iter: 4
  episodes_total: 6064
  experiment_id: 247f30f7b9bf49cfaf16e302aa75cae8
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01894678792056154
          cur_lr: 5.000000000000001e-05
          entropy: 0.726323754257626
          entropy_coeff: 0.009999999999999998
          kl: 0.009218455244083795
          policy_loss: 0.026768711747394668
          total_loss: 0.03353107228047318
          vf_explained_var: 0.2110038548707962
          vf_loss: 0.013850938601212368
    num_agent_steps_sampled: 1646000
    num_agent_steps_trained: 1646000
    num_steps_sampled: 1646000
    num_steps_trained: 16460

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b550d_00000,RUNNING,172.17.0.2:149717,1646,72024.1,1646000,-2.5325,-1.97,-5.21,253.25


Process _WandbLoggingProcess-1:
Traceback (most recent call last):
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/integration/wandb.py", line 200, in run
    result = self.queue.get()
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/queues.py", line 94, in get
    res = self._recv_bytes()
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/root/miniconda/en

Traceback (most recent call last):
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ipython-7.25.0-py3.7.egg/IPython/core/interactiveshell.py", line 3441, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_149613/3884869587.py", line 30, in <module>
    checkpoint_at_end=True)
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/tune.py", line 532, in run
    runner.step()
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 554, in step
    self._process_events(timeout=timeout)  # blocking
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 675, in _process_events
    timeout=timeout)  # blocking
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 718, in get_next_available_trial
    ready, _ = ray.wait(shuffled_results, timeout=timeout)
  File "/root/miniconda/envs/py37/lib/python3.7/sit

TypeError: object of type 'NoneType' has no len()