In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=8, stride=4, padding=0),  
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
            nn.ReLU(), 
            nn.Conv2d(64, 512, kernel_size=2, stride=1, padding=0),
            nn.ReLU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
from torch.nn.functional import one_hot

class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        visual_features_dim = 512
        target_features_dim = 9 * 11 * 11 
        self.visual_encoder = VisualEncoder()
        self.visual_encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AnnaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.target_encoder = nn.Sequential(
            nn.Conv3d(7, 1, kernel_size=1, stride=1, padding=0),
            nn.ELU(),
        )
        policy_hidden_dim = 256 
        self.policy_network = nn.Sequential(
            nn.Linear(visual_features_dim + target_features_dim, policy_hidden_dim),
            nn.ELU(),
            nn.Linear(policy_hidden_dim, policy_hidden_dim),
            nn.ELU(),
            nn.Linear(policy_hidden_dim, policy_hidden_dim),
            nn.ELU()
        )
        self.action_head = nn.Linear(policy_hidden_dim, action_space.n)
        self.value_head = nn.Linear(policy_hidden_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.visual_encoder.cuda()
            self.target_encoder.cuda()
            self.policy_network.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs']
        pov = obs['pov'].permute(0, 3, 1, 2).float() / 255.0
        target = one_hot(obs['target_grid'].long(), num_classes=7).permute(0, 4, 1, 2, 3).float()
        if self.use_cuda:
            pov.cuda()
            target.cuda()
            
        visual_features = self.visual_encoder(pov)
        target_features = self.target_encoder(target)
        target_features = target_features.reshape(target_features.shape[0], -1)
        features = torch.cat([visual_features, target_features], dim=1)
        features = self.policy_network(features)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
class VisualObservationWrapper(ObsWrapper):
    def __init__(self, env, include_target=False):
        super().__init__(env)
        self.observation_space = {   
            'pov': gym.spaces.Box(low=0, high=255, shape=(64, 64, 3)),
            'inventory': gym.spaces.Box(low=0.0, high=20.0, shape=(6,)),
            'compass': gym.spaces.Box(low=-180.0, high=180.0, shape=(1,))
        }
        if include_target:
            self.observation_space['target_grid'] = \
                gym.spaces.Box(low=0, high=6, shape=(9, 11, 11))
        self.observation_space = gym.spaces.Dict(self.observation_space)

    def observation(self, obs, reward=None, done=None, info=None):
        if info is not None:
            if 'target_grid' in info:
                target_grid = info['target_grid']
                del info['target_grid']
            else:
                logger.error(f'info: {info}')
                if hasattr(self.unwrapped, 'should_reset'):
                    self.unwrapped.should_reset(True)
                target_grid = self.env.unwrapped.tasks.current.target_grid
        else:
            target_grid = self.env.unwrapped.tasks.current.target_grid
        return {
            'pov': obs['pov'].astype(np.float32),
            'inventory': obs['inventory'],
            'compass': np.array([obs['compass']['angle'].item()]),
            'target_grid': target_grid
        }

In [6]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.1
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C32']))
    #env = PovOnlyWrapper(env)
    env = VisualObservationWrapper(env, include_target=True)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [7]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             #"gamma": 0.99,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO MultiTask (C32) pretrained (AnnaCNN) (3 noops after placement) r: -0.1"
                  }
              }

        },
        loggers=[WandbLogger])



Trial name,status,loc
PPO_my_env_67041_00000,PENDING,


2021-10-21 19:45:11,244	INFO wandb.py:170 -- Already logged into W&B.
2021-10-21 19:45:11,329	ERROR syncer.py:72 -- Log sync requires rsync to be installed.
[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.5 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=188)[0m 2021-10-21 19:45:14,919	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=188)[0m 2021-10-21 19:45:14,920	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-10-21_19-46-24
  done: false
  episode_len_mean: 431.0
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -44.50000000000032
  episode_reward_min: -47.100000000000314
  episodes_this_iter: 2
  episodes_total: 2
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.870019210709466
          entropy_coeff: 0.009999999999999998
          kl: 0.008355930164915317
          policy_loss: 0.11407052204012871
          total_loss: 0.44816574309435153
          vf_explained_var: 0.17857752740383148
          vf_loss: 0.3611242252919409
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1,63.9681,1000,-44.5,-41.9,-47.1,431


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-10-21_19-46-45
  done: false
  episode_len_mean: 437.25
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -44.42500000000034
  episode_reward_min: -47.100000000000314
  episodes_this_iter: 2
  episodes_total: 4
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8584622250662908
          entropy_coeff: 0.009999999999999998
          kl: 0.011583619312315207
          policy_loss: -0.0317104760143492
          total_loss: 0.19593507125973703
          vf_explained_var: 0.2962351143360138
          vf_loss: 0.2539134434527821
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,2,84.8505,2000,-44.425,-41.9,-47.1,437.25


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-10-21_19-47-06
  done: false
  episode_len_mean: 446.1666666666667
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -45.08333333333369
  episode_reward_min: -47.100000000000314
  episodes_this_iter: 2
  episodes_total: 6
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8357856220669215
          entropy_coeff: 0.009999999999999998
          kl: 0.01264564434471071
          policy_loss: -0.03530348688364029
          total_loss: 0.24198744456387228
          vf_explained_var: 0.13043729960918427
          vf_loss: 0.3031196624868446
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,3,105.337,3000,-45.0833,-41.9,-47.1,446.167


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-10-21_19-47-26
  done: false
  episode_len_mean: 452.125
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -45.56250000000037
  episode_reward_min: -47.500000000000405
  episodes_this_iter: 2
  episodes_total: 8
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8097981108559504
          entropy_coeff: 0.009999999999999998
          kl: 0.011498308436779691
          policy_loss: -0.14174783246384726
          total_loss: 0.22445397799213726
          vf_explained_var: 0.3966147005558014
          vf_loss: 0.3920001268800762
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,4,125.523,4000,-45.5625,-41.9,-47.5,452.125


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-10-21_19-47-44
  done: false
  episode_len_mean: 459.3
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -46.64000000000037
  episode_reward_min: -52.10000000000036
  episodes_this_iter: 2
  episodes_total: 10
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8066898425420126
          entropy_coeff: 0.009999999999999998
          kl: 0.01087134384527047
          policy_loss: -0.10355911486678654
          total_loss: 0.3922171402308676
          vf_explained_var: -0.0544796958565712
          vf_loss: 0.5216688842823108
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,5,143.164,5000,-46.64,-41.9,-52.1,459.3


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-10-21_19-48-02
  done: false
  episode_len_mean: 466.75
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -47.26666666666705
  episode_reward_min: -52.10000000000036
  episodes_this_iter: 2
  episodes_total: 12
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8099406162897744
          entropy_coeff: 0.009999999999999998
          kl: 0.013365268082949555
          policy_loss: -0.06631735265254975
          total_loss: 0.3190912836127811
          vf_explained_var: -0.18460072576999664
          vf_loss: 0.4108349885377619
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,6,161.791,6000,-47.2667,-41.9,-52.1,466.75


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-10-21_19-48-22
  done: false
  episode_len_mean: 470.0
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -47.50714285714325
  episode_reward_min: -52.10000000000036
  episodes_this_iter: 2
  episodes_total: 14
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7906154606077407
          entropy_coeff: 0.009999999999999998
          kl: 0.0113802377125947
          policy_loss: -0.14393462638060253
          total_loss: 0.451983509461085
          vf_explained_var: 0.47640669345855713
          vf_loss: 0.621548248661889
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,7,181.537,7000,-47.5071,-41.9,-52.1,470


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-10-21_19-48-40
  done: false
  episode_len_mean: 470.125
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -47.456250000000395
  episode_reward_min: -52.10000000000036
  episodes_this_iter: 2
  episodes_total: 16
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.764130809572008
          entropy_coeff: 0.009999999999999998
          kl: 0.012128803873119064
          policy_loss: -0.1233275098933114
          total_loss: 0.48756599062018924
          vf_explained_var: 0.37611034512519836
          vf_loss: 0.6361090388562944
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,8,198.862,8000,-47.4563,-41.9,-52.1,470.125


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-10-21_19-48-57
  done: false
  episode_len_mean: 475.1111111111111
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -47.90555555555596
  episode_reward_min: -55.60000000000052
  episodes_this_iter: 2
  episodes_total: 18
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.759118914604187
          entropy_coeff: 0.009999999999999998
          kl: 0.012078852490878629
          policy_loss: -0.11818718281057146
          total_loss: 0.6384885827700297
          vf_explained_var: 0.21568845212459564
          vf_loss: 0.7818511882589922
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,9,216.717,9000,-47.9056,-41.9,-55.6,475.111


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-10-21_19-49-16
  done: false
  episode_len_mean: 478.1
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -48.165000000000404
  episode_reward_min: -55.60000000000052
  episodes_this_iter: 2
  episodes_total: 20
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.690669494205051
          entropy_coeff: 0.009999999999999998
          kl: 0.014870752542641894
          policy_loss: -0.10929866565598381
          total_loss: 0.7214911573463016
          vf_explained_var: 0.24331161379814148
          vf_loss: 0.85472235944536
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 10000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,10,235.052,10000,-48.165,-41.9,-55.6,478.1


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-10-21_19-49-33
  done: false
  episode_len_mean: 482.1363636363636
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -48.53636363636405
  episode_reward_min: -55.60000000000052
  episodes_this_iter: 2
  episodes_total: 22
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.713226554128859
          entropy_coeff: 0.009999999999999998
          kl: 0.01026421027588553
          policy_loss: -0.0932375646299786
          total_loss: 0.6475026405519909
          vf_explained_var: 0.17319241166114807
          vf_loss: 0.7658196238904363
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 11000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,11,251.905,11000,-48.5364,-41.9,-55.6,482.136


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-10-21_19-49-50
  done: false
  episode_len_mean: 487.0833333333333
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -49.00416666666708
  episode_reward_min: -55.60000000000052
  episodes_this_iter: 2
  episodes_total: 24
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.706070015165541
          entropy_coeff: 0.009999999999999998
          kl: 0.010167827164075412
          policy_loss: -0.10574780172771878
          total_loss: 0.8482673191361957
          vf_explained_var: 0.07108146697282791
          vf_loss: 0.9790422518220213
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,12,269.303,12000,-49.0042,-41.9,-55.6,487.083


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-10-21_19-50-06
  done: false
  episode_len_mean: 490.03846153846155
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -49.276923076923495
  episode_reward_min: -55.60000000000052
  episodes_this_iter: 2
  episodes_total: 26
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.679174868265788
          entropy_coeff: 0.009999999999999998
          kl: 0.014950040742959282
          policy_loss: -0.08379651109377544
          total_loss: 0.9271505003174146
          vf_explained_var: 0.3020694851875305
          vf_loss: 1.0347487446334627
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,13,284.98,13000,-49.2769,-41.9,-55.6,490.038


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-10-21_19-50-22
  done: false
  episode_len_mean: 494.75
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -49.728571428571854
  episode_reward_min: -59.10000000000057
  episodes_this_iter: 2
  episodes_total: 28
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.69371579753028
          entropy_coeff: 0.009999999999999998
          kl: 0.010090792901651784
          policy_loss: 0.11088829338550568
          total_loss: 0.68485623035166
          vf_explained_var: 0.03738937899470329
          vf_loss: 0.5988869380826751
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,14,300.909,14000,-49.7286,-41.9,-59.1,494.75




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-10-21_19-50-58
  done: false
  episode_len_mean: 496.4
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -49.8766666666671
  episode_reward_min: -59.10000000000057
  episodes_this_iter: 2
  episodes_total: 30
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.669465568330553
          entropy_coeff: 0.009999999999999998
          kl: 0.01296062447532834
          policy_loss: 0.11247597750690248
          total_loss: 0.6329808650745286
          vf_explained_var: 0.04360957071185112
          vf_loss: 0.5446074224853267
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,15,336.877,15000,-49.8767,-41.9,-59.1,496.4


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-10-21_19-51-16
  done: false
  episode_len_mean: 498.375
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -50.059375000000436
  episode_reward_min: -59.10000000000057
  episodes_this_iter: 2
  episodes_total: 32
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6471008751127454
          entropy_coeff: 0.009999999999999998
          kl: 0.014014963005470804
          policy_loss: 0.08672250608603159
          total_loss: 0.7545508692661921
          vf_explained_var: -0.5086021423339844
          vf_loss: 0.6914963832383768
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 16000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,16,355.17,16000,-50.0594,-41.9,-59.1,498.375


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-10-21_19-51-33
  done: false
  episode_len_mean: 499.90909090909093
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -50.206060606061044
  episode_reward_min: -59.10000000000057
  episodes_this_iter: 1
  episodes_total: 33
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.576486905415853
          entropy_coeff: 0.009999999999999998
          kl: 0.015381103348932904
          policy_loss: -0.0759546751777331
          total_loss: 0.6102874391608768
          vf_explained_var: 0.18112337589263916
          vf_loss: 0.7089307372458279
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 17000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,17,372.311,17000,-50.2061,-41.9,-59.1,499.909


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-10-21_19-51-50
  done: false
  episode_len_mean: 504.0857142857143
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -50.611428571429016
  episode_reward_min: -59.10000000000057
  episodes_this_iter: 2
  episodes_total: 35
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5723727888531154
          entropy_coeff: 0.009999999999999998
          kl: 0.011366254164989265
          policy_loss: -0.0975855247841941
          total_loss: 1.2397961093319787
          vf_explained_var: -0.025136588141322136
          vf_loss: 1.360832108805577
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 18000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,18,388.575,18000,-50.6114,-41.9,-59.1,504.086


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-10-21_19-52-05
  done: false
  episode_len_mean: 507.64864864864865
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -50.9567567567572
  episode_reward_min: -59.10000000000057
  episodes_this_iter: 2
  episodes_total: 37
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.589824978510539
          entropy_coeff: 0.009999999999999998
          kl: 0.009862673542195294
          policy_loss: 0.03601759597659111
          total_loss: 0.7750120085146692
          vf_explained_var: 0.23784400522708893
          vf_loss: 0.7629201359632942
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,19,404.543,19000,-50.9568,-41.9,-59.1,507.649


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-10-21_19-52-21
  done: false
  episode_len_mean: 512.3846153846154
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -51.42051282051328
  episode_reward_min: -61.0000000000006
  episodes_this_iter: 2
  episodes_total: 39
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.570416463745965
          entropy_coeff: 0.009999999999999998
          kl: 0.01033835197217867
          policy_loss: 0.08347965412669711
          total_loss: 0.7922902262873119
          vf_explained_var: 0.06979181617498398
          vf_loss: 0.7324470605649468
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,20,420.429,20000,-51.4205,-41.9,-61,512.385


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-10-21_19-52-38
  done: false
  episode_len_mean: 514.525
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -51.63000000000045
  episode_reward_min: -61.0000000000006
  episodes_this_iter: 1
  episodes_total: 40
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5447475565804374
          entropy_coeff: 0.009999999999999998
          kl: 0.01398126310162695
          policy_loss: -0.07943584207031462
          total_loss: 0.6273413133290079
          vf_explained_var: 0.06436425447463989
          vf_loss: 0.729428361614959
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 21000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,21,436.856,21000,-51.63,-41.9,-61,514.525


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-10-21_19-52-54
  done: false
  episode_len_mean: 520.2380952380952
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -52.1928571428576
  episode_reward_min: -65.50000000000055
  episodes_this_iter: 2
  episodes_total: 42
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5521687295701767
          entropy_coeff: 0.009999999999999998
          kl: 0.010116103509333346
          policy_loss: 0.09199536475870344
          total_loss: 0.8236031505796645
          vf_explained_var: 0.04376600682735443
          vf_loss: 0.7551062579085637
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,22,452.489,22000,-52.1929,-41.9,-65.5,520.238


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-10-21_19-53-08
  done: false
  episode_len_mean: 521.6279069767442
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -52.327906976744636
  episode_reward_min: -65.50000000000055
  episodes_this_iter: 1
  episodes_total: 43
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5572037034564548
          entropy_coeff: 0.009999999999999998
          kl: 0.013210922336876981
          policy_loss: -0.06301210953129663
          total_loss: 0.6694337195820279
          vf_explained_var: -0.01649225503206253
          vf_loss: 0.7553757060836587
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 2300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,23,467.315,23000,-52.3279,-41.9,-65.5,521.628


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-10-21_19-53-25
  done: false
  episode_len_mean: 524.9333333333333
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -52.65111111111157
  episode_reward_min: -65.50000000000055
  episodes_this_iter: 2
  episodes_total: 45
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.540920697318183
          entropy_coeff: 0.009999999999999998
          kl: 0.012855387696125773
          policy_loss: -0.08689102033774058
          total_loss: 1.3587060772710375
          vf_explained_var: 0.05704528093338013
          vf_loss: 1.4684352117280166
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,24,483.988,24000,-52.6511,-41.9,-65.5,524.933


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-10-21_19-53-42
  done: false
  episode_len_mean: 526.1702127659574
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -52.76808510638343
  episode_reward_min: -65.50000000000055
  episodes_this_iter: 2
  episodes_total: 47
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.516576237148709
          entropy_coeff: 0.009999999999999998
          kl: 0.009299136861711244
          policy_loss: -0.07710754772027334
          total_loss: 1.3689865244759454
          vf_explained_var: 0.053020626306533813
          vf_loss: 1.4694000056634346
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,25,500.891,25000,-52.7681,-41.9,-65.5,526.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-10-21_19-53-58
  done: false
  episode_len_mean: 527.3469387755102
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -52.879591836735166
  episode_reward_min: -65.50000000000055
  episodes_this_iter: 2
  episodes_total: 49
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4750089274512397
          entropy_coeff: 0.009999999999999998
          kl: 0.012627765318444291
          policy_loss: 0.09071502370966805
          total_loss: 0.8377265648709403
          vf_explained_var: -0.00045867497101426125
          vf_loss: 0.7692360691001846
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 26

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,26,517.129,26000,-52.8796,-41.9,-65.5,527.347


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-10-21_19-54-14
  done: false
  episode_len_mean: 529.0
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -53.03921568627499
  episode_reward_min: -65.50000000000055
  episodes_this_iter: 2
  episodes_total: 51
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4521827936172484
          entropy_coeff: 0.009999999999999998
          kl: 0.011870057173874373
          policy_loss: 0.08884123398198021
          total_loss: 0.8508145186636183
          vf_explained_var: -0.22409775853157043
          vf_loss: 0.7841211104196393
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,27,532.976,27000,-53.0392,-41.9,-65.5,529


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-10-21_19-54-30
  done: false
  episode_len_mean: 530.0
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -53.13653846153894
  episode_reward_min: -65.50000000000055
  episodes_this_iter: 1
  episodes_total: 52
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.376091811392042
          entropy_coeff: 0.009999999999999998
          kl: 0.015400418462821936
          policy_loss: -0.08757431871361203
          total_loss: 0.7012329722444216
          vf_explained_var: 0.3275669813156128
          vf_loss: 0.809488123924368
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,28,548.564,28000,-53.1365,-41.9,-65.5,530


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-10-21_19-54-44
  done: false
  episode_len_mean: 535.3518518518518
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -53.66666666666715
  episode_reward_min: -70.10000000000029
  episodes_this_iter: 2
  episodes_total: 54
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3257186465793187
          entropy_coeff: 0.009999999999999998
          kl: 0.009041294171309566
          policy_loss: 0.12650789270798365
          total_loss: 0.6170921345551809
          vf_explained_var: 0.31595349311828613
          vf_loss: 0.5120331661491138
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained: 29000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,29,562.679,29000,-53.6667,-41.9,-70.1,535.352


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-10-21_19-54-58
  done: false
  episode_len_mean: 538.5818181818182
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -53.9872727272732
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 1
  episodes_total: 55
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.356505537033081
          entropy_coeff: 0.009999999999999998
          kl: 0.015335185851781022
          policy_loss: -0.07212572089499897
          total_loss: 0.6787651458548175
          vf_explained_var: 0.2669464349746704
          vf_loss: 0.7713888763062035
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,30,576.863,30000,-53.9873,-41.9,-71.3,538.582


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-10-21_19-55-13
  done: false
  episode_len_mean: 542.6315789473684
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -54.38771929824609
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 57
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.328096848063999
          entropy_coeff: 0.009999999999999998
          kl: 0.009161788684574646
          policy_loss: 0.08872253215975232
          total_loss: 0.8751916693316566
          vf_explained_var: 0.0035085512790828943
          vf_loss: 0.8079177579221626
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 31000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,31,591.658,31000,-54.3877,-41.9,-71.3,542.632


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-10-21_19-55-29
  done: false
  episode_len_mean: 544.5
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -54.57241379310393
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 1
  episodes_total: 58
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.316037384668986
          entropy_coeff: 0.009999999999999998
          kl: 0.00891044625136161
          policy_loss: -0.05703105744388368
          total_loss: 0.7108443318141832
          vf_explained_var: 0.001343333045952022
          vf_loss: 0.7892536663346821
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,32,607.321,32000,-54.5724,-41.9,-71.3,544.5




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-10-21_19-55-59
  done: false
  episode_len_mean: 547.7666666666667
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -54.89500000000048
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 60
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3282202137841117
          entropy_coeff: 0.009999999999999998
          kl: 0.009203199714268568
          policy_loss: 0.0882041297852993
          total_loss: 0.8237108866373698
          vf_explained_var: 0.20289848744869232
          vf_loss: 0.7569483029966553
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 33000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,33,637.972,33000,-54.895,-41.9,-71.3,547.767


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-10-21_19-56-15
  done: false
  episode_len_mean: 549.672131147541
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -55.083606557377536
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 1
  episodes_total: 61
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.246173787117004
          entropy_coeff: 0.009999999999999998
          kl: 0.010342493333244492
          policy_loss: -0.10293270316388872
          total_loss: 0.6170170722736252
          vf_explained_var: 0.3356965482234955
          vf_loss: 0.7403430140059856
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained: 34000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,34,654.099,34000,-55.0836,-41.9,-71.3,549.672


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-10-21_19-56-30
  done: false
  episode_len_mean: 551.936507936508
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -55.306349206349694
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 63
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1807536681493125
          entropy_coeff: 0.009999999999999998
          kl: 0.010426950371881391
          policy_loss: -0.014998993774255117
          total_loss: 1.124724453356531
          vf_explained_var: 0.17325623333454132
          vf_loss: 1.1594455951617824
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 35000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,35,668.988,35000,-55.3063,-41.9,-71.3,551.937


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-10-21_19-56-44
  done: false
  episode_len_mean: 553.59375
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -55.47031250000048
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 1
  episodes_total: 64
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1736742575963337
          entropy_coeff: 0.009999999999999998
          kl: 0.009430747964449636
          policy_loss: -0.0750247907307413
          total_loss: 0.6570864953100681
          vf_explained_var: 0.3707999885082245
          vf_loss: 0.7519618826504383
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,36,683.117,36000,-55.4703,-41.9,-71.3,553.594


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-10-21_19-56-59
  done: false
  episode_len_mean: 556.8787878787879
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -55.79545454545503
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 66
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1385304106606378
          entropy_coeff: 0.009999999999999998
          kl: 0.01043032116910574
          policy_loss: -0.06403740843137105
          total_loss: 1.3598714219199286
          vf_explained_var: -0.18627139925956726
          vf_loss: 1.4432080760598183
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,37,697.448,37000,-55.7955,-41.9,-71.3,556.879


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-10-21_19-57-15
  done: false
  episode_len_mean: 558.0746268656717
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -55.91343283582138
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 1
  episodes_total: 67
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1495853635999893
          entropy_coeff: 0.009999999999999998
          kl: 0.006482062038863429
          policy_loss: -0.07039180414544212
          total_loss: 0.667331885960367
          vf_explained_var: -0.07871842384338379
          vf_loss: 0.7579231252117703
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 38000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,38,713.218,38000,-55.9134,-41.9,-71.3,558.075


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-10-21_19-57-32
  done: false
  episode_len_mean: 559.3913043478261
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -56.042028985507734
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 69
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.166685207684835
          entropy_coeff: 0.009999999999999998
          kl: 0.01031423021780847
          policy_loss: -0.09407375040981504
          total_loss: 1.2925884127616882
          vf_explained_var: 0.059123892337083817
          vf_loss: 1.406266188952658
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 39000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,39,730.591,39000,-56.042,-41.9,-71.3,559.391


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-10-21_19-57-50
  done: false
  episode_len_mean: 559.7183098591549
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -56.07183098591599
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 71
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1862266884909736
          entropy_coeff: 0.009999999999999998
          kl: 0.010505183475538053
          policy_loss: -0.0885338748494784
          total_loss: 1.2790832075807783
          vf_explained_var: 0.14812149107456207
          vf_loss: 1.3873783036238618
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,40,748.254,40000,-56.0718,-41.9,-71.3,559.718


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-10-21_19-58-07
  done: false
  episode_len_mean: 559.5068493150685
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -56.04794520547994
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 73
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.207313317722744
          entropy_coeff: 0.009999999999999998
          kl: 0.009171290589966807
          policy_loss: 0.1057785087161594
          total_loss: 0.8280019485288196
          vf_explained_var: 0.07067710906267166
          vf_loss: 0.7424623174127192
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,41,765.775,41000,-56.0479,-41.9,-71.3,559.507


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-10-21_19-58-23
  done: false
  episode_len_mean: 559.1466666666666
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -56.009333333333814
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 75
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.20032455391354
          entropy_coeff: 0.009999999999999998
          kl: 0.006892334212402663
          policy_loss: 0.10128338502513037
          total_loss: 0.8510569863849216
          vf_explained_var: 0.029578253626823425
          vf_loss: 0.7703983818594781
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,42,781.196,42000,-56.0093,-41.9,-71.3,559.147


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-10-21_19-58-42
  done: false
  episode_len_mean: 558.0519480519481
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -55.89740259740309
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 77
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.2370052019755047
          entropy_coeff: 0.009999999999999998
          kl: 0.010861669024947753
          policy_loss: 0.08612267706129285
          total_loss: 0.816722442375289
          vf_explained_var: -0.2588679790496826
          vf_loss: 0.7507974765780899
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,43,800.409,43000,-55.8974,-41.9,-71.3,558.052


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-10-21_19-59-02
  done: false
  episode_len_mean: 556.9113924050633
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -55.78101265822834
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 79
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.2262771209081014
          entropy_coeff: 0.009999999999999998
          kl: 0.012062316749502422
          policy_loss: 0.0712088801794582
          total_loss: 0.8052570192350281
          vf_explained_var: 0.10937660932540894
          vf_loss: 0.7538984482073121
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,44,820.347,44000,-55.781,-41.9,-71.3,556.911


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-10-21_19-59-22
  done: false
  episode_len_mean: 555.3086419753087
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -55.61851851851902
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 81
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.229884468184577
          entropy_coeff: 0.009999999999999998
          kl: 0.009139992916891949
          policy_loss: 0.07760500642988417
          total_loss: 0.8216055770715077
          vf_explained_var: 0.4203053414821625
          vf_loss: 0.7644714321734176
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,45,840.607,45000,-55.6185,-41.9,-71.3,555.309


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-10-21_19-59-41
  done: false
  episode_len_mean: 553.5421686746988
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -55.43975903614507
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 83
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.2007546769248116
          entropy_coeff: 0.009999999999999998
          kl: 0.008522418083959267
          policy_loss: 0.08893601828151279
          total_loss: 0.8360980176263385
          vf_explained_var: -0.1651369333267212
          vf_loss: 0.7674650710490015
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,46,858.949,46000,-55.4398,-41.9,-71.3,553.542


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-10-21_19-59-59
  done: false
  episode_len_mean: 551.8588235294118
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -55.26941176470637
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 85
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.205797741148207
          entropy_coeff: 0.009999999999999998
          kl: 0.012831855565469752
          policy_loss: 0.09805714090665181
          total_loss: 0.8503721667660608
          vf_explained_var: -0.26076143980026245
          vf_loss: 0.7718066317236258
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,47,877.062,47000,-55.2694,-41.9,-71.3,551.859


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-10-21_20-00-18
  done: false
  episode_len_mean: 550.4827586206897
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -55.129885057471746
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 87
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1600095589955646
          entropy_coeff: 0.009999999999999998
          kl: 0.011186673711174914
          policy_loss: 0.14321054302983813
          total_loss: 0.5480725601315498
          vf_explained_var: 0.06549254059791565
          vf_loss: 0.4242247662817438
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,48,896.325,48000,-55.1299,-41.9,-71.3,550.483


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-10-21_20-00-38
  done: false
  episode_len_mean: 549.0112359550562
  episode_media: {}
  episode_reward_max: -41.900000000000325
  episode_reward_mean: -54.98089887640498
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 89
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1364802175097997
          entropy_coeff: 0.009999999999999998
          kl: 0.011951165411571433
          policy_loss: 0.08967369198799133
          total_loss: 0.8921126898792054
          vf_explained_var: 0.06834074854850769
          vf_loss: 0.8214135594148603
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,49,916.121,49000,-54.9809,-41.9,-71.3,549.011




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-10-21_20-01-16
  done: false
  episode_len_mean: 545.934065934066
  episode_media: {}
  episode_reward_max: -38.20000000000027
  episode_reward_mean: -54.671428571429054
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 91
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0935128741794164
          entropy_coeff: 0.009999999999999998
          kl: 0.01243627611124904
          policy_loss: -0.09752343810266918
          total_loss: 1.3854776942067677
          vf_explained_var: -0.14214102923870087
          vf_loss: 1.5014490101072524
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,50,954.643,50000,-54.6714,-38.2,-71.3,545.934


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-10-21_20-01-36
  done: false
  episode_len_mean: 543.6774193548387
  episode_media: {}
  episode_reward_max: -38.20000000000027
  episode_reward_mean: -54.44408602150586
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 93
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0408752547370064
          entropy_coeff: 0.009999999999999998
          kl: 0.01136256548358653
          policy_loss: -0.10575901865959167
          total_loss: 1.3188215649790234
          vf_explained_var: 0.16893325746059418
          vf_loss: 1.4427168213658863
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,51,974.762,51000,-54.4441,-38.2,-71.3,543.677


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-10-21_20-01-56
  done: false
  episode_len_mean: 541.125
  episode_media: {}
  episode_reward_max: -38.20000000000027
  episode_reward_mean: -54.1864583333338
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 3
  episodes_total: 96
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0550785091188217
          entropy_coeff: 0.009999999999999998
          kl: 0.01360095404595641
          policy_loss: 0.03567056937350167
          total_loss: 1.3720797455973095
          vf_explained_var: 0.26267391443252563
          vf_loss: 1.3542397735019525
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,52,993.918,52000,-54.1865,-38.2,-71.3,541.125


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-10-21_20-02-16
  done: false
  episode_len_mean: 539.030612244898
  episode_media: {}
  episode_reward_max: -38.20000000000027
  episode_reward_mean: -53.9755102040821
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 98
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.000629528363546
          entropy_coeff: 0.009999999999999998
          kl: 0.007064573783017123
          policy_loss: 0.08279361128807068
          total_loss: 0.7949174943897459
          vf_explained_var: 0.27736690640449524
          vf_loss: 0.7307172567066219
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,53,1014.06,53000,-53.9755,-38.2,-71.3,539.031


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-10-21_20-02-36
  done: false
  episode_len_mean: 536.86
  episode_media: {}
  episode_reward_max: -38.20000000000027
  episode_reward_mean: -53.757000000000474
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 100
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9631862057579887
          entropy_coeff: 0.009999999999999998
          kl: 0.010155212321287344
          policy_loss: -0.10001540647612678
          total_loss: 1.235871864358584
          vf_explained_var: 0.25032567977905273
          vf_loss: 1.353488099657827
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,54,1034.42,54000,-53.757,-38.2,-71.3,536.86


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-10-21_20-02-56
  done: false
  episode_len_mean: 536.94
  episode_media: {}
  episode_reward_max: -38.20000000000027
  episode_reward_mean: -53.73700000000048
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 102
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.983852708339691
          entropy_coeff: 0.009999999999999998
          kl: 0.0110010935307503
          policy_loss: -0.10356386833720738
          total_loss: 1.1836291511853536
          vf_explained_var: 0.1822572648525238
          vf_loss: 1.3048313234415319
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,55,1053.92,55000,-53.737,-38.2,-71.3,536.94


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-10-21_20-03-17
  done: false
  episode_len_mean: 535.86
  episode_media: {}
  episode_reward_max: -38.20000000000027
  episode_reward_mean: -53.62900000000047
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 3
  episodes_total: 105
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9212053524123298
          entropy_coeff: 0.009999999999999998
          kl: 0.008957734892105683
          policy_loss: 0.045720416804154715
          total_loss: 1.0568119919962353
          vf_explained_var: 0.1083906814455986
          vf_loss: 1.0285120743016403
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,56,1075.6,56000,-53.629,-38.2,-71.3,535.86


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-10-21_20-03-39
  done: false
  episode_len_mean: 533.57
  episode_media: {}
  episode_reward_max: -37.30000000000026
  episode_reward_mean: -53.40000000000045
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 3
  episodes_total: 108
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9085089246431985
          entropy_coeff: 0.009999999999999998
          kl: 0.010582720441477731
          policy_loss: 0.07803182237678104
          total_loss: 0.8297398742702272
          vf_explained_var: 0.01573053188621998
          vf_loss: 0.7686766100426515
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,57,1097.15,57000,-53.4,-37.3,-71.3,533.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-10-21_20-03-57
  done: false
  episode_len_mean: 532.39
  episode_media: {}
  episode_reward_max: -37.30000000000026
  episode_reward_mean: -53.23900000000045
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 110
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.898706967300839
          entropy_coeff: 0.009999999999999998
          kl: 0.007857961520818949
          policy_loss: 0.10356177356508044
          total_loss: 0.6917507449785868
          vf_explained_var: 0.1838500201702118
          vf_loss: 0.6056044490645743
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,58,1115.34,58000,-53.239,-37.3,-71.3,532.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-10-21_20-04-17
  done: false
  episode_len_mean: 531.05
  episode_media: {}
  episode_reward_max: -37.30000000000026
  episode_reward_mean: -53.10500000000045
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 112
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8179822405179342
          entropy_coeff: 0.009999999999999998
          kl: 0.012381312840118536
          policy_loss: -0.09968162775039673
          total_loss: 1.2858155584997601
          vf_explained_var: -0.11006749421358109
          vf_loss: 1.4012007363968426
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,59,1135.58,59000,-53.105,-37.3,-71.3,531.05


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-10-21_20-04-37
  done: false
  episode_len_mean: 529.36
  episode_media: {}
  episode_reward_max: -37.30000000000026
  episode_reward_mean: -52.93600000000046
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 3
  episodes_total: 115
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8076864255799188
          entropy_coeff: 0.009999999999999998
          kl: 0.008072986279476385
          policy_loss: 0.02988013012541665
          total_loss: 1.4004043062527975
          vf_explained_var: -0.06000395864248276
          vf_loss: 1.3869864510993162
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,60,1155.57,60000,-52.936,-37.3,-71.3,529.36


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-10-21_20-04-58
  done: false
  episode_len_mean: 527.22
  episode_media: {}
  episode_reward_max: -37.30000000000026
  episode_reward_mean: -52.72200000000046
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 117
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7508775764041478
          entropy_coeff: 0.009999999999999998
          kl: 0.007590484346509502
          policy_loss: 0.07194121529658636
          total_loss: 0.7856243315670225
          vf_explained_var: 0.08303176611661911
          vf_loss: 0.7296737895036737
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,61,1176.36,61000,-52.722,-37.3,-71.3,527.22




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-10-21_20-05-36
  done: false
  episode_len_mean: 523.75
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -52.375000000000455
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 3
  episodes_total: 120
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6657678008079528
          entropy_coeff: 0.009999999999999998
          kl: 0.0059148094867686775
          policy_loss: 0.04728805265492863
          total_loss: 1.408257582783699
          vf_explained_var: -0.2645685076713562
          vf_loss: 1.3764442555606364
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,62,1213.84,62000,-52.375,-34.5,-71.3,523.75


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-10-21_20-06-00
  done: false
  episode_len_mean: 520.96
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -52.09600000000044
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 122
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6705612977345785
          entropy_coeff: 0.009999999999999998
          kl: 0.005861646057084771
          policy_loss: -0.08882160683472952
          total_loss: 1.2276185893350178
          vf_explained_var: -0.08561834692955017
          vf_loss: 1.3319734774529934
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,63,1238.03,63000,-52.096,-34.5,-71.3,520.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-10-21_20-06-20
  done: false
  episode_len_mean: 516.97
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -51.69700000000044
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 3
  episodes_total: 125
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6801171938578288
          entropy_coeff: 0.009999999999999998
          kl: 0.010083238904162488
          policy_loss: 0.04847638284166654
          total_loss: 1.3723484807544284
          vf_explained_var: 0.04802031069993973
          vf_loss: 1.3386566274695926
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,64,1258.33,64000,-51.697,-34.5,-71.3,516.97


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-10-21_20-06-40
  done: false
  episode_len_mean: 514.76
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -51.47600000000043
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 127
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7083257132106358
          entropy_coeff: 0.009999999999999998
          kl: 0.005476760003341259
          policy_loss: 0.0314515577422248
          total_loss: 0.7192830592393875
          vf_explained_var: -0.10662764310836792
          vf_loss: 0.7038194143937694
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,65,1277.71,65000,-51.476,-34.5,-71.3,514.76


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-10-21_20-06-59
  done: false
  episode_len_mean: 513.21
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -51.321000000000424
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 129
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.674438390466902
          entropy_coeff: 0.009999999999999998
          kl: 0.007152535839873507
          policy_loss: -0.08483102718989054
          total_loss: 1.276158294412825
          vf_explained_var: 0.05601485073566437
          vf_loss: 1.3763032024933233
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,66,1297.13,66000,-51.321,-34.5,-71.3,513.21


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-10-21_20-07-18
  done: false
  episode_len_mean: 512.33
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -51.23300000000042
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 131
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6024838195906745
          entropy_coeff: 0.009999999999999998
          kl: 0.006931048782175214
          policy_loss: -0.09226394361919826
          total_loss: 1.2482628908422257
          vf_explained_var: 0.1289328932762146
          vf_loss: 1.35516546898418
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,67,1315.68,67000,-51.233,-34.5,-71.3,512.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-10-21_20-07-38
  done: false
  episode_len_mean: 510.99
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -51.09900000000042
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 133
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5624709632661609
          entropy_coeff: 0.009999999999999998
          kl: 0.010855007304364254
          policy_loss: -0.0970137294795778
          total_loss: 1.211395979921023
          vf_explained_var: 0.06744308024644852
          vf_loss: 1.321863421301047
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,68,1335.62,68000,-51.099,-34.5,-71.3,510.99


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-10-21_20-07-56
  done: false
  episode_len_mean: 507.52
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -50.75200000000043
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 3
  episodes_total: 136
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5076585160361395
          entropy_coeff: 0.009999999999999998
          kl: 0.0032427409587296434
          policy_loss: 0.058127092238929534
          total_loss: 1.180280895034472
          vf_explained_var: -0.15554292500019073
          vf_loss: 1.136581830638978
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,69,1353.64,69000,-50.752,-34.5,-71.3,507.52


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-10-21_20-08-13
  done: false
  episode_len_mean: 506.05
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -50.605000000000416
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 138
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4359052485889858
          entropy_coeff: 0.009999999999999998
          kl: 0.009270099098979282
          policy_loss: 0.07848918735980988
          total_loss: 0.7822591038213835
          vf_explained_var: 0.23300321400165558
          vf_loss: 0.717201964567519
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,70,1371.24,70000,-50.605,-34.5,-71.3,506.05


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-10-21_20-08-30
  done: false
  episode_len_mean: 505.67
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -50.56700000000043
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 1
  episodes_total: 139
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4176006542311774
          entropy_coeff: 0.009999999999999998
          kl: 0.005753100287751936
          policy_loss: -0.06596269458532333
          total_loss: 0.665075047314167
          vf_explained_var: 0.19103026390075684
          vf_loss: 0.744638425256643
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,71,1387.56,71000,-50.567,-34.5,-71.3,505.67


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-10-21_20-08-46
  done: false
  episode_len_mean: 504.17
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -50.417000000000414
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 141
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4691184163093567
          entropy_coeff: 0.009999999999999998
          kl: 0.011443112349801983
          policy_loss: -0.07386333288417922
          total_loss: 1.2205045183499654
          vf_explained_var: -0.05336112901568413
          vf_loss: 1.3079147135424944
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,72,1404.3,72000,-50.417,-34.5,-71.3,504.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-10-21_20-09-04
  done: false
  episode_len_mean: 502.16
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -50.21600000000041
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 143
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.379048392507765
          entropy_coeff: 0.009999999999999998
          kl: 0.009969281371688036
          policy_loss: -0.08619094540675482
          total_loss: 1.2965459277232487
          vf_explained_var: -0.0015092802932485938
          vf_loss: 1.3955304314692816
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,73,1422.22,73000,-50.216,-34.5,-71.3,502.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-10-21_20-09-20
  done: false
  episode_len_mean: 499.92
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -49.99200000000042
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 145
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2884930226537916
          entropy_coeff: 0.009999999999999998
          kl: 0.007433618079527794
          policy_loss: -0.0754166150258647
          total_loss: 1.1888934714926613
          vf_explained_var: 0.05931603163480759
          vf_loss: 1.2764516769184007
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,74,1438.13,74000,-49.992,-34.5,-71.3,499.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-10-21_20-09-36
  done: false
  episode_len_mean: 499.72
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -49.97200000000043
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 147
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.1757288641399808
          entropy_coeff: 0.009999999999999998
          kl: 0.008470754359260467
          policy_loss: -0.08908098604944018
          total_loss: 1.2400290346807903
          vf_explained_var: 0.027566319331526756
          vf_loss: 1.3400202496184244
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,75,1454.03,75000,-49.972,-34.5,-71.3,499.72


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-10-21_20-09-51
  done: false
  episode_len_mean: 500.51
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -50.05100000000042
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 149
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.264886694484287
          entropy_coeff: 0.009999999999999998
          kl: 0.007116146750134888
          policy_loss: 0.10211476965083016
          total_loss: 0.7904486964146297
          vf_explained_var: -0.01233174279332161
          vf_loss: 0.7002711776333551
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,76,1469.05,76000,-50.051,-34.5,-71.3,500.51




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-10-21_20-10-25
  done: false
  episode_len_mean: 499.91
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -49.99100000000041
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 151
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.163686842388577
          entropy_coeff: 0.009999999999999998
          kl: 0.0070167584826684405
          policy_loss: 0.09844436777962579
          total_loss: 0.8194345858361985
          vf_explained_var: -0.04660458490252495
          vf_loss: 0.7319254189212289
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,77,1502.28,77000,-49.991,-34.5,-71.3,499.91


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-10-21_20-10-40
  done: false
  episode_len_mean: 500.3
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -50.03000000000043
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 1
  episodes_total: 152
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.243829650349087
          entropy_coeff: 0.009999999999999998
          kl: 0.00766716981263552
          policy_loss: -0.05937171942657894
          total_loss: 0.6814405648244752
          vf_explained_var: -0.1990266740322113
          vf_loss: 0.7524838690749474
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,78,1517.94,78000,-50.03,-34.5,-71.3,500.3


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-10-21_20-10-56
  done: false
  episode_len_mean: 498.09
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -49.809000000000424
  episode_reward_min: -71.30000000000022
  episodes_this_iter: 2
  episodes_total: 154
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2695814569791157
          entropy_coeff: 0.009999999999999998
          kl: 0.008496687102902915
          policy_loss: -0.09221980902883742
          total_loss: 1.3309646166033215
          vf_explained_var: 0.03273554518818855
          vf_loss: 1.435030573937628
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,79,1533.96,79000,-49.809,-34.5,-71.3,498.09


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-10-21_20-11-13
  done: false
  episode_len_mean: 494.87
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -49.487000000000414
  episode_reward_min: -67.90000000000042
  episodes_this_iter: 2
  episodes_total: 156
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3277724650171068
          entropy_coeff: 0.009999999999999998
          kl: 0.008711770399201452
          policy_loss: -0.07550564325518078
          total_loss: 1.3053256630897523
          vf_explained_var: 0.10051143169403076
          vf_loss: 1.3932378460135726
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,80,1550.84,80000,-49.487,-34.5,-67.9,494.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-10-21_20-11-30
  done: false
  episode_len_mean: 492.17
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -49.21700000000042
  episode_reward_min: -67.90000000000042
  episodes_this_iter: 2
  episodes_total: 158
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.361590137746599
          entropy_coeff: 0.009999999999999998
          kl: 0.008573143017805762
          policy_loss: 0.06968118713961707
          total_loss: 0.7987078567345937
          vf_explained_var: 0.01802639290690422
          vf_loss: 0.7417852655053139
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,81,1567.63,81000,-49.217,-34.5,-67.9,492.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-10-21_20-11-48
  done: false
  episode_len_mean: 489.14
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -48.914000000000414
  episode_reward_min: -66.5000000000005
  episodes_this_iter: 2
  episodes_total: 160
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3379177464379204
          entropy_coeff: 0.009999999999999998
          kl: 0.006378017472088052
          policy_loss: 0.009016238732470407
          total_loss: 0.8669352034727732
          vf_explained_var: -0.1893262416124344
          vf_loss: 0.87066033275591
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,82,1585.97,82000,-48.914,-34.5,-66.5,489.14


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-10-21_20-12-07
  done: false
  episode_len_mean: 485.85
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -48.585000000000406
  episode_reward_min: -66.5000000000005
  episodes_this_iter: 2
  episodes_total: 162
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2187932756212023
          entropy_coeff: 0.009999999999999998
          kl: 0.010263836901249235
          policy_loss: -0.07598128451241387
          total_loss: 1.2902171942922804
          vf_explained_var: 0.08976206183433533
          vf_loss: 1.3773600153625012
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,83,1604.33,83000,-48.585,-34.5,-66.5,485.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-10-21_20-12-24
  done: false
  episode_len_mean: 482.88
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -48.28800000000042
  episode_reward_min: -66.5000000000005
  episodes_this_iter: 2
  episodes_total: 164
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.1422158545917935
          entropy_coeff: 0.009999999999999998
          kl: 0.01013135814297183
          policy_loss: -0.07115373760461807
          total_loss: 1.2620725095272065
          vf_explained_var: -0.16927097737789154
          vf_loss: 1.343635257333517
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,84,1621.7,84000,-48.288,-34.5,-66.5,482.88


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-10-21_20-12-41
  done: false
  episode_len_mean: 479.91
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -47.99100000000041
  episode_reward_min: -63.700000000000635
  episodes_this_iter: 2
  episodes_total: 166
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.1304668876859876
          entropy_coeff: 0.009999999999999998
          kl: 0.006278978657145255
          policy_loss: -0.07283059722847408
          total_loss: 1.262041492594613
          vf_explained_var: -0.11575774848461151
          vf_loss: 1.34554885327816
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,85,1638.91,85000,-47.991,-34.5,-63.7,479.91


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-10-21_20-12-59
  done: false
  episode_len_mean: 476.82
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -47.682000000000414
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 168
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.1773344212108188
          entropy_coeff: 0.009999999999999998
          kl: 0.009210511000388305
          policy_loss: -0.08770541350046794
          total_loss: 1.2557606236802208
          vf_explained_var: -0.0029758114833384752
          vf_loss: 1.3543183253043227
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,86,1656.65,86000,-47.682,-34.5,-62,476.82


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-10-21_20-13-18
  done: false
  episode_len_mean: 475.1
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -47.5100000000004
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 170
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.1536328792572021
          entropy_coeff: 0.009999999999999998
          kl: 0.006980959340582831
          policy_loss: -0.08888820409774781
          total_loss: 1.2558137512869305
          vf_explained_var: 0.04201749339699745
          vf_loss: 1.3555401744114028
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,87,1675.08,87000,-47.51,-34.5,-62,475.1


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-10-21_20-13-37
  done: false
  episode_len_mean: 471.49
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -47.1490000000004
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 173
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.1997090525097318
          entropy_coeff: 0.009999999999999998
          kl: 0.012985115653686484
          policy_loss: 0.03381174074278937
          total_loss: 1.3521086166302363
          vf_explained_var: 0.034576114267110825
          vf_loss: 1.3289954577883085
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 88000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,88,1694.66,88000,-47.149,-34.5,-62,471.49


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-10-21_20-13-58
  done: false
  episode_len_mean: 468.43
  episode_media: {}
  episode_reward_max: -34.50000000000022
  episode_reward_mean: -46.843000000000394
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 175
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2118241959147984
          entropy_coeff: 0.009999999999999998
          kl: 0.009944467665966922
          policy_loss: 0.008135565039184358
          total_loss: 0.8163942509227329
          vf_explained_var: 0.09164069592952728
          vf_loss: 0.8193824871132771
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,89,1715.92,89000,-46.843,-34.5,-62,468.43


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-10-21_20-14-23
  done: false
  episode_len_mean: 463.75
  episode_media: {}
  episode_reward_max: -33.500000000000206
  episode_reward_mean: -46.37500000000037
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 178
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.1855054405000476
          entropy_coeff: 0.009999999999999998
          kl: 0.00683990811532177
          policy_loss: -0.002375236319171058
          total_loss: 1.2851615455415515
          vf_explained_var: 0.007669944316148758
          vf_loss: 1.2987078527609508
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,90,1740.44,90000,-46.375,-33.5,-62,463.75




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-10-21_20-15-05
  done: false
  episode_len_mean: 458.05
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -45.80500000000036
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 181
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.1908524817890591
          entropy_coeff: 0.009999999999999998
          kl: 0.007022921646521358
          policy_loss: -0.0017514729665385352
          total_loss: 1.2948674708604813
          vf_explained_var: 0.0397043377161026
          vf_loss: 1.3078251630067825
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,91,1782.45,91000,-45.805,-29.2,-62,458.05


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-10-21_20-15-27
  done: false
  episode_len_mean: 454.54
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -45.45400000000037
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 184
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2459976302252875
          entropy_coeff: 0.009999999999999998
          kl: 0.012326672571313481
          policy_loss: 0.042204436245891784
          total_loss: 1.308274026049508
          vf_explained_var: 0.06466283649206161
          vf_loss: 1.2772968981001112
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,92,1804.87,92000,-45.454,-29.2,-62,454.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-10-21_20-15-49
  done: false
  episode_len_mean: 452.69
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -45.26900000000037
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 186
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2223554478751288
          entropy_coeff: 0.009999999999999998
          kl: 0.005030597515453729
          policy_loss: -0.09363313880231645
          total_loss: 1.1305521458387375
          vf_explained_var: 0.08354948461055756
          vf_loss: 1.235905791984664
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,93,1825.94,93000,-45.269,-29.2,-62,452.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-10-21_20-16-10
  done: false
  episode_len_mean: 450.11
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -45.01100000000038
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 189
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2142272498872546
          entropy_coeff: 0.009999999999999998
          kl: 0.0065010967170332085
          policy_loss: 0.05282855712705188
          total_loss: 1.2169653607739344
          vf_explained_var: 0.0990230143070221
          vf_loss: 1.1756289727985858
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,94,1847.21,94000,-45.011,-29.2,-62,450.11


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-10-21_20-16-32
  done: false
  episode_len_mean: 448.51
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -44.85100000000037
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 192
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.228338118394216
          entropy_coeff: 0.009999999999999998
          kl: 0.0052855156488453205
          policy_loss: 0.04023369732830259
          total_loss: 1.243362898959054
          vf_explained_var: 0.08867328613996506
          vf_loss: 1.2148840360343456
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,95,1869.82,95000,-44.851,-29.2,-62,448.51


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-10-21_20-16-56
  done: false
  episode_len_mean: 446.59
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -44.65900000000036
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 194
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2351696915096706
          entropy_coeff: 0.009999999999999998
          kl: 0.009594283532783117
          policy_loss: -0.0932990958293279
          total_loss: 1.1028337942229376
          vf_explained_var: -0.2825847566127777
          vf_loss: 1.2075251531269815
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,96,1893.13,96000,-44.659,-29.2,-62,446.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-10-21_20-17-20
  done: false
  episode_len_mean: 443.4
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -44.34000000000035
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 197
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.18142884572347
          entropy_coeff: 0.009999999999999998
          kl: 0.0022048264940192296
          policy_loss: -0.102868938114908
          total_loss: 1.610870095094045
          vf_explained_var: 0.03916458785533905
          vf_loss: 1.725332838959164
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,97,1917.7,97000,-44.34,-29.2,-62,443.4


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-10-21_20-17-44
  done: false
  episode_len_mean: 440.55
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -44.055000000000355
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 200
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.1707713060908846
          entropy_coeff: 0.009999999999999998
          kl: 0.009660441399604914
          policy_loss: -0.09056793997685114
          total_loss: 1.3301885022057427
          vf_explained_var: 0.043380074203014374
          vf_loss: 1.4319811476601494
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,98,1941.72,98000,-44.055,-29.2,-62,440.55


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-10-21_20-18-10
  done: false
  episode_len_mean: 437.09
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -43.70900000000035
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 203
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.1612811989254421
          entropy_coeff: 0.009999999999999998
          kl: 0.004855022627888639
          policy_loss: -0.10899448634849654
          total_loss: 1.4360965092976887
          vf_explained_var: 0.1175180971622467
          vf_loss: 1.5564610600471496
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,99,1967.17,99000,-43.709,-29.2,-62,437.09


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-10-21_20-18-36
  done: false
  episode_len_mean: 434.22
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -43.42200000000034
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 4
  episodes_total: 207
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.2638174613316855
          entropy_coeff: 0.009999999999999998
          kl: 0.014318111811012092
          policy_loss: -0.0024678531620237563
          total_loss: 1.4335359573364257
          vf_explained_var: 0.21519339084625244
          vf_loss: 1.4482840405570137
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,100,1992.92,100000,-43.422,-29.2,-62,434.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-10-21_20-18-58
  done: false
  episode_len_mean: 433.1
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -43.310000000000336
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 209
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.3172655238045587
          entropy_coeff: 0.009999999999999998
          kl: 0.020130310714178833
          policy_loss: -0.0746696690718333
          total_loss: 1.0843035984370444
          vf_explained_var: 0.2668769061565399
          vf_loss: 1.171642648966776
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 101000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,101,2015.53,101000,-43.31,-29.2,-62,433.1




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-10-21_20-19-39
  done: false
  episode_len_mean: 431.06
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -43.10600000000035
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 212
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0375
          cur_lr: 5.000000000000001e-05
          entropy: 1.310812246799469
          entropy_coeff: 0.009999999999999998
          kl: 0.015342037545550052
          policy_loss: -0.0028339165780279373
          total_loss: 1.0251452853282292
          vf_explained_var: 0.26508840918540955
          vf_loss: 1.0405119999622305
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 102000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,102,2055.68,102000,-43.106,-29.2,-62,431.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-10-21_20-19-58
  done: false
  episode_len_mean: 430.38
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -43.038000000000345
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 214
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0375
          cur_lr: 5.000000000000001e-05
          entropy: 1.3777512669563294
          entropy_coeff: 0.009999999999999998
          kl: 0.023304676446851344
          policy_loss: -0.06653834730386735
          total_loss: 0.9999101147055626
          vf_explained_var: 0.1392296999692917
          vf_loss: 1.0793520473978586
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 103000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,103,2075.41,103000,-43.038,-29.2,-62,430.38


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-10-21_20-20-19
  done: false
  episode_len_mean: 429.56
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -42.95600000000033
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 217
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05625000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.348219092686971
          entropy_coeff: 0.009999999999999998
          kl: 0.0038165884076959322
          policy_loss: -0.07897206296523412
          total_loss: 1.1902282655239105
          vf_explained_var: -0.0005071123596280813
          vf_loss: 1.282467828856574
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 104000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,104,2096.55,104000,-42.956,-29.2,-62,429.56


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-10-21_20-20-40
  done: false
  episode_len_mean: 430.03
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -43.00300000000033
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 219
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02812500000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.299804937839508
          entropy_coeff: 0.009999999999999998
          kl: 0.005433987813303408
          policy_loss: -0.0782616095410453
          total_loss: 1.1212807161940468
          vf_explained_var: -0.19230329990386963
          vf_loss: 1.2123875476419925
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 105000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,105,2116.9,105000,-43.003,-29.2,-62,430.03


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-10-21_20-21-01
  done: false
  episode_len_mean: 430.95
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -43.09500000000035
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 222
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02812500000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2729204297065735
          entropy_coeff: 0.009999999999999998
          kl: 0.013884886720401666
          policy_loss: 0.040779657165209454
          total_loss: 1.1423570815059874
          vf_explained_var: -0.1335977464914322
          vf_loss: 1.113916098409229
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 106000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,106,2137.97,106000,-43.095,-29.2,-62,430.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-10-21_20-21-25
  done: false
  episode_len_mean: 429.13
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -42.91300000000034
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 225
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02812500000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3255087044503955
          entropy_coeff: 0.009999999999999998
          kl: 0.0030574779411331773
          policy_loss: 0.033729716307587095
          total_loss: 1.2989713754918841
          vf_explained_var: 0.1597297042608261
          vf_loss: 1.278410756587982
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 107000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,107,2162.29,107000,-42.913,-29.2,-62,429.13


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-10-21_20-21-48
  done: false
  episode_len_mean: 426.29
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -42.62900000000033
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 228
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 1.2708593712912666
          entropy_coeff: 0.009999999999999998
          kl: 0.009112283952466486
          policy_loss: 0.04719170232613881
          total_loss: 1.298459471265475
          vf_explained_var: -0.26952052116394043
          vf_loss: 1.2638482161694102
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 108000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,108,2184.99,108000,-42.629,-29.2,-62,426.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-10-21_20-22-12
  done: false
  episode_len_mean: 422.34
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -42.234000000000336
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 231
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 1.1541667620340983
          entropy_coeff: 0.009999999999999998
          kl: 0.01284413405479654
          policy_loss: 0.046422833369837865
          total_loss: 1.2180969645579656
          vf_explained_var: -0.06508941203355789
          vf_loss: 1.1830351760817899
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 109000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,109,2209.33,109000,-42.234,-29.2,-62,422.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-10-21_20-22-39
  done: false
  episode_len_mean: 418.27
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -41.827000000000325
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 234
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 1.1620493027899
          entropy_coeff: 0.009999999999999998
          kl: 0.004827830780552291
          policy_loss: 0.05835212121407191
          total_loss: 1.2420766956276363
          vf_explained_var: 0.23089705407619476
          vf_loss: 1.1952771845791075
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,110,2235.74,110000,-41.827,-29.2,-62,418.27


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-10-21_20-23-06
  done: false
  episode_len_mean: 412.76
  episode_media: {}
  episode_reward_max: -29.200000000000145
  episode_reward_mean: -41.276000000000316
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 237
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.1971209009488424
          entropy_coeff: 0.009999999999999998
          kl: 0.006749752527548757
          policy_loss: -0.08809399542709191
          total_loss: 1.6451809101634556
          vf_explained_var: 0.09318457543849945
          vf_loss: 1.7451986326111688
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 111000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,111,2262.63,111000,-41.276,-29.2,-62,412.76




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-10-21_20-23-49
  done: false
  episode_len_mean: 403.64
  episode_media: {}
  episode_reward_max: -28.90000000000014
  episode_reward_mean: -40.36400000000031
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 4
  episodes_total: 241
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.2007395413186814
          entropy_coeff: 0.009999999999999998
          kl: 0.009672754874377097
          policy_loss: -0.01533144898712635
          total_loss: 1.6205034123526678
          vf_explained_var: 0.12501470744609833
          vf_loss: 1.6477742499775356
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 112000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,112,2305.5,112000,-40.364,-28.9,-62,403.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-10-21_20-24-14
  done: false
  episode_len_mean: 398.21
  episode_media: {}
  episode_reward_max: -28.90000000000014
  episode_reward_mean: -39.821000000000296
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 244
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.0843566801812914
          entropy_coeff: 0.009999999999999998
          kl: 0.014361024853706302
          policy_loss: 0.0541085304485427
          total_loss: 1.131409059299363
          vf_explained_var: 0.2594636380672455
          vf_loss: 1.0880431202550729
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 113000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,113,2330.82,113000,-39.821,-28.9,-62,398.21


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-10-21_20-24-42
  done: false
  episode_len_mean: 390.81
  episode_media: {}
  episode_reward_max: -27.600000000000122
  episode_reward_mean: -39.08100000000029
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 247
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.07573092646069
          entropy_coeff: 0.009999999999999998
          kl: 0.002374798481419344
          policy_loss: -0.006177976230780284
          total_loss: 1.1826015257173115
          vf_explained_var: 0.1632786989212036
          vf_loss: 1.199520124329461
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 114000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,114,2359.25,114000,-39.081,-27.6,-62,390.81


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-10-21_20-25-10
  done: false
  episode_len_mean: 379.33
  episode_media: {}
  episode_reward_max: -27.000000000000114
  episode_reward_mean: -37.93300000000027
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 4
  episodes_total: 251
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0035156250000000014
          cur_lr: 5.000000000000001e-05
          entropy: 1.0825565430853101
          entropy_coeff: 0.009999999999999998
          kl: 0.0057389335333917265
          policy_loss: 0.008343981703122456
          total_loss: 1.6440049979421827
          vf_explained_var: 0.05619650334119797
          vf_loss: 1.6464664114846124
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 115000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,115,2387.16,115000,-37.933,-27,-62,379.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-10-21_20-25-36
  done: false
  episode_len_mean: 370.71
  episode_media: {}
  episode_reward_max: -27.000000000000114
  episode_reward_mean: -37.071000000000254
  episode_reward_min: -53.60000000000049
  episodes_this_iter: 3
  episodes_total: 254
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0035156250000000014
          cur_lr: 5.000000000000001e-05
          entropy: 1.0133016453848944
          entropy_coeff: 0.009999999999999998
          kl: 0.009197237915338302
          policy_loss: 0.008213980827066633
          total_loss: 1.1654653168386884
          vf_explained_var: 0.02560029737651348
          vf_loss: 1.1673520137866338
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 116000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,116,2412.76,116000,-37.071,-27,-53.6,370.71


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-10-21_20-26-05
  done: false
  episode_len_mean: 361.2
  episode_media: {}
  episode_reward_max: -26.80000000000011
  episode_reward_mean: -36.120000000000246
  episode_reward_min: -52.20000000000047
  episodes_this_iter: 4
  episodes_total: 258
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0035156250000000014
          cur_lr: 5.000000000000001e-05
          entropy: 1.0258033262358772
          entropy_coeff: 0.009999999999999998
          kl: 0.006170190726935627
          policy_loss: 0.013413311044375101
          total_loss: 1.6101161082585653
          vf_explained_var: 0.054018307477235794
          vf_loss: 1.6069391316837736
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 117000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,117,2441.78,117000,-36.12,-26.8,-52.2,361.2


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-10-21_20-26-34
  done: false
  episode_len_mean: 354.98
  episode_media: {}
  episode_reward_max: -26.500000000000107
  episode_reward_mean: -35.49800000000024
  episode_reward_min: -52.20000000000047
  episodes_this_iter: 3
  episodes_total: 261
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0035156250000000014
          cur_lr: 5.000000000000001e-05
          entropy: 0.995679748058319
          entropy_coeff: 0.009999999999999998
          kl: 0.005466436242413477
          policy_loss: -0.10109240230586794
          total_loss: 1.2368830601374308
          vf_explained_var: 0.15424205362796783
          vf_loss: 1.347913044028812
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 118000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,118,2471.08,118000,-35.498,-26.5,-52.2,354.98


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-10-21_20-27-04
  done: false
  episode_len_mean: 346.08
  episode_media: {}
  episode_reward_max: -26.500000000000107
  episode_reward_mean: -34.608000000000224
  episode_reward_min: -52.20000000000047
  episodes_this_iter: 4
  episodes_total: 265
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0035156250000000014
          cur_lr: 5.000000000000001e-05
          entropy: 0.9106775893105401
          entropy_coeff: 0.009999999999999998
          kl: 0.007923031761091793
          policy_loss: 0.007218541701634725
          total_loss: 1.2003734972741869
          vf_explained_var: 0.2689778506755829
          vf_loss: 1.202233879433738
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 119000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,119,2500.59,119000,-34.608,-26.5,-52.2,346.08


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-10-21_20-27-35
  done: false
  episode_len_mean: 336.68
  episode_media: {}
  episode_reward_max: -25.30000000000009
  episode_reward_mean: -33.668000000000205
  episode_reward_min: -48.00000000000041
  episodes_this_iter: 4
  episodes_total: 269
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0035156250000000014
          cur_lr: 5.000000000000001e-05
          entropy: 0.8876043955485026
          entropy_coeff: 0.009999999999999998
          kl: 0.019772556185474206
          policy_loss: 0.027714814411269294
          total_loss: 0.96375905474027
          vf_explained_var: 0.3930686116218567
          vf_loss: 0.9448507679833307
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,120,2531.33,120000,-33.668,-25.3,-48,336.68




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-10-21_20-28-23
  done: false
  episode_len_mean: 328.92
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -32.892000000000195
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 273
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0035156250000000014
          cur_lr: 5.000000000000001e-05
          entropy: 0.8264889478683471
          entropy_coeff: 0.009999999999999998
          kl: 0.007439826490349886
          policy_loss: 0.029211345563332238
          total_loss: 1.298874796099133
          vf_explained_var: 0.3843511939048767
          vf_loss: 1.2779021739959717
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 121000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,121,2580.2,121000,-32.892,-22.9,-42.5,328.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-10-21_20-28-55
  done: false
  episode_len_mean: 324.3
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -32.4300000000002
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 277
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0035156250000000014
          cur_lr: 5.000000000000001e-05
          entropy: 0.8716558727953169
          entropy_coeff: 0.009999999999999998
          kl: 0.006932984795955595
          policy_loss: 0.028621425852179527
          total_loss: 0.8496120866802004
          vf_explained_var: 0.6329618692398071
          vf_loss: 0.8296828505065706
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 122000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,122,2611.56,122000,-32.43,-22.9,-42.5,324.3


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-10-21_20-29-25
  done: false
  episode_len_mean: 321.74
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -32.174000000000184
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 281
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0035156250000000014
          cur_lr: 5.000000000000001e-05
          entropy: 1.0210825562477113
          entropy_coeff: 0.009999999999999998
          kl: 0.014221679049875076
          policy_loss: 0.025806956821017794
          total_loss: 0.6333098606930838
          vf_explained_var: 0.7576674818992615
          vf_loss: 0.6176637308465109
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,123,2641.63,123000,-32.174,-22.9,-42.5,321.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-10-21_20-29-54
  done: false
  episode_len_mean: 319.1
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -31.91000000000018
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 284
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0035156250000000014
          cur_lr: 5.000000000000001e-05
          entropy: 1.2816035641564263
          entropy_coeff: 0.009999999999999998
          kl: 0.024840928291038407
          policy_loss: 0.022390230993429818
          total_loss: 0.5787031524711185
          vf_explained_var: 0.8024687767028809
          vf_loss: 0.5690416286389033
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 124000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,124,2670.34,124000,-31.91,-22.9,-42.5,319.1


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-10-21_20-30-22
  done: false
  episode_len_mean: 314.43
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -31.44300000000017
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 288
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.142139622900221
          entropy_coeff: 0.009999999999999998
          kl: 0.009738235133245648
          policy_loss: -0.02592053109159072
          total_loss: 0.6900841206312179
          vf_explained_var: 0.7996207475662231
          vf_loss: 0.7273746924267874
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 125000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,125,2698.7,125000,-31.443,-22.9,-42.5,314.43


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-10-21_20-30-50
  done: false
  episode_len_mean: 311.44
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -31.144000000000172
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 291
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2889601932631598
          entropy_coeff: 0.009999999999999998
          kl: 0.01162267572428293
          policy_loss: 0.003666628193524149
          total_loss: 0.8725741196009847
          vf_explained_var: 0.6677355766296387
          vf_loss: 0.8817358023590511
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trained: 126000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,126,2726.46,126000,-31.144,-22.9,-42.5,311.44


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-10-21_20-31-17
  done: false
  episode_len_mean: 309.15
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -30.91500000000017
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 295
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3818946639696756
          entropy_coeff: 0.009999999999999998
          kl: 0.011338431275501411
          policy_loss: 0.024211639414230983
          total_loss: 0.9032218986087375
          vf_explained_var: 0.7110071778297424
          vf_loss: 0.892769416835573
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 127000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,127,2753.36,127000,-30.915,-22.9,-42.5,309.15


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-10-21_20-31-44
  done: false
  episode_len_mean: 307.99
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -30.79900000000017
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 298
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5613745345009697
          entropy_coeff: 0.009999999999999998
          kl: 0.019914098841534805
          policy_loss: 0.07577849353353182
          total_loss: 0.45167145099904804
          vf_explained_var: 0.8685112595558167
          vf_loss: 0.39140168494648403
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 128000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,128,2780.44,128000,-30.799,-22.9,-42.5,307.99




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-10-21_20-32-28
  done: false
  episode_len_mean: 307.06
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -30.706000000000166
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 301
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7624483320448134
          entropy_coeff: 0.009999999999999998
          kl: 0.01180180056580211
          policy_loss: -0.04568116366863251
          total_loss: 0.8495294120576646
          vf_explained_var: 0.7066831588745117
          vf_loss: 0.9127728145983484
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 129000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,129,2824.28,129000,-30.706,-22.9,-42.5,307.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-10-21_20-32-54
  done: false
  episode_len_mean: 306.84
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -30.68400000000016
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 304
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6522764868206448
          entropy_coeff: 0.009999999999999998
          kl: 0.016155657979486825
          policy_loss: -0.06254565111464924
          total_loss: 1.3254256208737691
          vf_explained_var: 0.5089595913887024
          vf_loss: 1.4044088357024722
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,130,2850.69,130000,-30.684,-22.9,-42.5,306.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-10-21_20-33-20
  done: false
  episode_len_mean: 306.95
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -30.69500000000016
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 307
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7710215422842237
          entropy_coeff: 0.009999999999999998
          kl: 0.0241475720657177
          policy_loss: -0.10116358498732249
          total_loss: 0.5111266276902623
          vf_explained_var: 0.8227960467338562
          vf_loss: 0.6298730843596988
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 131000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,131,2876.22,131000,-30.695,-22.9,-42.5,306.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-10-21_20-33-45
  done: false
  episode_len_mean: 306.67
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -30.66700000000016
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 310
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00791015625
          cur_lr: 5.000000000000001e-05
          entropy: 1.86029794216156
          entropy_coeff: 0.009999999999999998
          kl: 0.038434604230494905
          policy_loss: -0.08930085044768121
          total_loss: 0.44656794137424893
          vf_explained_var: 0.853797972202301
          vf_loss: 0.5541677481598324
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained: 132000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,132,2901,132000,-30.667,-22.9,-42.5,306.67


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-10-21_20-34-11
  done: false
  episode_len_mean: 304.66
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -30.466000000000157
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 313
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011865234375000006
          cur_lr: 5.000000000000001e-05
          entropy: 1.794412260585361
          entropy_coeff: 0.009999999999999998
          kl: 0.020602224705993502
          policy_loss: -0.08298947372370297
          total_loss: 0.5197391864740186
          vf_explained_var: 0.8895981907844543
          vf_loss: 0.6204283326864243
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 133000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,133,2927.86,133000,-30.466,-22.9,-42.5,304.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-10-21_20-34-39
  done: false
  episode_len_mean: 301.44
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -30.144000000000155
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 317
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017797851562499994
          cur_lr: 5.000000000000001e-05
          entropy: 1.7028242296642728
          entropy_coeff: 0.009999999999999998
          kl: 0.01663833151550236
          policy_loss: -0.13603376779291365
          total_loss: 0.22174148013194403
          vf_explained_var: 0.9370612502098083
          vf_loss: 0.37450736280944613
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 134000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,134,2955.89,134000,-30.144,-22.9,-42.5,301.44


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-10-21_20-35-07
  done: false
  episode_len_mean: 297.63
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -29.763000000000158
  episode_reward_min: -39.9000000000003
  episodes_this_iter: 3
  episodes_total: 320
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017797851562499994
          cur_lr: 5.000000000000001e-05
          entropy: 1.5805396411154005
          entropy_coeff: 0.009999999999999998
          kl: 0.015699069738388576
          policy_loss: 0.08492359005742603
          total_loss: 0.5371103392706977
          vf_explained_var: 0.9047027826309204
          vf_loss: 0.4677127348052131
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 135000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,135,2983.6,135000,-29.763,-22.9,-39.9,297.63


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-10-21_20-35-33
  done: false
  episode_len_mean: 295.74
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -29.574000000000154
  episode_reward_min: -38.00000000000027
  episodes_this_iter: 3
  episodes_total: 323
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017797851562499994
          cur_lr: 5.000000000000001e-05
          entropy: 1.7042273971769544
          entropy_coeff: 0.009999999999999998
          kl: 0.016169507132699264
          policy_loss: -0.11526707319749727
          total_loss: 1.1587903340657553
          vf_explained_var: 0.7000382542610168
          vf_loss: 1.2908118844032288
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,136,3009.11,136000,-29.574,-22.9,-38,295.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-10-21_20-35-55
  done: false
  episode_len_mean: 296.04
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -29.604000000000152
  episode_reward_min: -38.00000000000027
  episodes_this_iter: 3
  episodes_total: 326
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017797851562499994
          cur_lr: 5.000000000000001e-05
          entropy: 1.872448534435696
          entropy_coeff: 0.009999999999999998
          kl: 0.015361934194151022
          policy_loss: -0.017987858172920014
          total_loss: 0.6225540886322657
          vf_explained_var: 0.6770822405815125
          vf_loss: 0.658993027276463
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 137000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,137,3031.33,137000,-29.604,-22.9,-38,296.04


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-10-21_20-36-18
  done: false
  episode_len_mean: 295.89
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -29.58900000000015
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 3
  episodes_total: 329
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017797851562499994
          cur_lr: 5.000000000000001e-05
          entropy: 1.8523219347000122
          entropy_coeff: 0.009999999999999998
          kl: 0.018865483166686285
          policy_loss: 0.07427542275852628
          total_loss: 0.9084850715266334
          vf_explained_var: 0.6785491704940796
          vf_loss: 0.8523970966537794
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 138000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,138,3054.01,138000,-29.589,-22.9,-37.9,295.89




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-10-21_20-36-59
  done: false
  episode_len_mean: 296.4
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -29.64000000000015
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 3
  episodes_total: 332
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017797851562499994
          cur_lr: 5.000000000000001e-05
          entropy: 1.8513380620214674
          entropy_coeff: 0.009999999999999998
          kl: 0.009494611461292714
          policy_loss: 0.07222592929999033
          total_loss: 1.078275453713205
          vf_explained_var: 0.4732338488101959
          vf_loss: 1.024393925898605
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 139000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,139,3095.61,139000,-29.64,-22.9,-37.9,296.4


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-10-21_20-37-25
  done: false
  episode_len_mean: 297.39
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -29.73900000000015
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 3
  episodes_total: 335
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017797851562499994
          cur_lr: 5.000000000000001e-05
          entropy: 1.8217438856760662
          entropy_coeff: 0.009999999999999998
          kl: 0.01023142697536288
          policy_loss: 0.10017560356193118
          total_loss: 0.916319109333886
          vf_explained_var: 0.5171593427658081
          vf_loss: 0.8341788532005416
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,140,3120.91,140000,-29.739,-22.9,-37.9,297.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-10-21_20-37-50
  done: false
  episode_len_mean: 298.33
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -29.833000000000162
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 3
  episodes_total: 338
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017797851562499994
          cur_lr: 5.000000000000001e-05
          entropy: 1.852012242211236
          entropy_coeff: 0.009999999999999998
          kl: 0.009951771984038002
          policy_loss: 0.10935022334257762
          total_loss: 1.0167636629607943
          vf_explained_var: 0.4097521901130676
          vf_loss: 0.9257564321160316
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 141000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,141,3145.99,141000,-29.833,-22.9,-37.9,298.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-10-21_20-38-14
  done: false
  episode_len_mean: 299.7
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -29.97000000000016
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 3
  episodes_total: 341
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017797851562499994
          cur_lr: 5.000000000000001e-05
          entropy: 1.8060229751798842
          entropy_coeff: 0.009999999999999998
          kl: 0.014326900258541957
          policy_loss: 0.0638227067887783
          total_loss: 1.0497055623266431
          vf_explained_var: 0.1457592248916626
          vf_loss: 1.0036880963378483
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained: 142000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,142,3170.69,142000,-29.97,-22.9,-37.9,299.7


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-10-21_20-38-38
  done: false
  episode_len_mean: 300.52
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -30.052000000000163
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 2
  episodes_total: 343
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017797851562499994
          cur_lr: 5.000000000000001e-05
          entropy: 1.7140257199605307
          entropy_coeff: 0.009999999999999998
          kl: 0.012164715842688661
          policy_loss: -0.10098616315258874
          total_loss: 0.8551387445794212
          vf_explained_var: 0.3489418029785156
          vf_loss: 0.9730486477414767
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_trained: 143000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,143,3194.69,143000,-30.052,-22.9,-37.9,300.52


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-10-21_20-39-04
  done: false
  episode_len_mean: 301.57
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -30.157000000000156
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 4
  episodes_total: 347
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017797851562499994
          cur_lr: 5.000000000000001e-05
          entropy: 1.5889264914724561
          entropy_coeff: 0.009999999999999998
          kl: 0.00953524756226132
          policy_loss: 0.021747458395030762
          total_loss: 1.4072185066011218
          vf_explained_var: 0.16163407266139984
          vf_loss: 1.401190612051222
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained: 144000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,144,3220.21,144000,-30.157,-22.9,-37.9,301.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-10-21_20-39-31
  done: false
  episode_len_mean: 302.47
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -30.247000000000163
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 3
  episodes_total: 350
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017797851562499994
          cur_lr: 5.000000000000001e-05
          entropy: 1.6446651723649768
          entropy_coeff: 0.009999999999999998
          kl: 0.021601436517830228
          policy_loss: 0.06550277637110816
          total_loss: 1.2303133408228557
          vf_explained_var: -0.08627817034721375
          vf_loss: 1.1808727741241456
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_trained: 145000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,145,3246.74,145000,-30.247,-22.9,-37.9,302.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-10-21_20-39-57
  done: false
  episode_len_mean: 303.36
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -30.33600000000017
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 3
  episodes_total: 353
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02669677734375
          cur_lr: 5.000000000000001e-05
          entropy: 1.5637921863132054
          entropy_coeff: 0.009999999999999998
          kl: 0.014323518030299257
          policy_loss: 0.0578191833363639
          total_loss: 0.8562573273976644
          vf_explained_var: 0.07146638631820679
          vf_loss: 0.8136936823527018
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_trained: 146000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,146,3273.01,146000,-30.336,-22.9,-37.9,303.36


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-10-21_20-40-24
  done: false
  episode_len_mean: 303.41
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -30.34100000000017
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 3
  episodes_total: 356
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02669677734375
          cur_lr: 5.000000000000001e-05
          entropy: 1.4878372920884027
          entropy_coeff: 0.009999999999999998
          kl: 0.00950639550679229
          policy_loss: -0.10301844833625687
          total_loss: 1.4375516613324484
          vf_explained_var: 0.06863229721784592
          vf_loss: 1.5551946851942273
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_trained: 147000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,147,3300.39,147000,-30.341,-22.9,-37.9,303.41




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-10-21_20-41-09
  done: false
  episode_len_mean: 303.64
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -30.364000000000164
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 4
  episodes_total: 360
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02669677734375
          cur_lr: 5.000000000000001e-05
          entropy: 1.344636172718472
          entropy_coeff: 0.009999999999999998
          kl: 0.01483748329966114
          policy_loss: -0.00042876361144913566
          total_loss: 1.680101588037279
          vf_explained_var: 0.09966416656970978
          vf_loss: 1.6935805850558812
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained: 148000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,148,3344.81,148000,-30.364,-22.9,-37.9,303.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-10-21_20-41-39
  done: false
  episode_len_mean: 303.59
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -30.359000000000155
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 4
  episodes_total: 364
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02669677734375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2908740997314454
          entropy_coeff: 0.009999999999999998
          kl: 0.005456205149533054
          policy_loss: 0.017075619432661268
          total_loss: 1.5068569209840563
          vf_explained_var: 0.1839260309934616
          vf_loss: 1.5025443620151944
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_trained: 149000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,149,3374.95,149000,-30.359,-22.9,-37.9,303.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-10-21_20-42-09
  done: false
  episode_len_mean: 303.69
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -30.36900000000016
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 3
  episodes_total: 367
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02669677734375
          cur_lr: 5.000000000000001e-05
          entropy: 1.1505555656221178
          entropy_coeff: 0.009999999999999998
          kl: 0.016963134318085257
          policy_loss: -0.10995953778425853
          total_loss: 0.9827497561772665
          vf_explained_var: 0.43386074900627136
          vf_loss: 1.103761973645952
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 150000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,150,3405.13,150000,-30.369,-22.9,-37.9,303.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-10-21_20-42-39
  done: false
  episode_len_mean: 304.47
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -30.447000000000163
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 4
  episodes_total: 371
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02669677734375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2438983347680834
          entropy_coeff: 0.009999999999999998
          kl: 0.013045330103096435
          policy_loss: -0.03623672020104196
          total_loss: 0.7998225669066111
          vf_explained_var: 0.5097808837890625
          vf_loss: 0.848150008254581
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained: 151000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,151,3434.73,151000,-30.447,-24.8,-37.9,304.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-10-21_20-43-09
  done: false
  episode_len_mean: 304.69
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -30.469000000000168
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 4
  episodes_total: 375
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02669677734375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2257187750604417
          entropy_coeff: 0.009999999999999998
          kl: 0.006019571435997366
          policy_loss: 0.043907481928666434
          total_loss: 0.9245525125000212
          vf_explained_var: 0.6101548671722412
          vf_loss: 0.8927415091130468
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained: 152000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,152,3465.42,152000,-30.469,-24.8,-37.9,304.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-10-21_20-43-41
  done: false
  episode_len_mean: 304.44
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -30.444000000000166
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 4
  episodes_total: 379
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02669677734375
          cur_lr: 5.000000000000001e-05
          entropy: 1.127552607986662
          entropy_coeff: 0.009999999999999998
          kl: 0.009290538321893157
          policy_loss: 0.029834593791100712
          total_loss: 1.126190372970369
          vf_explained_var: 0.4520528018474579
          vf_loss: 1.1073832876152463
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_trained: 153000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,153,3496.89,153000,-30.444,-24.8,-37.9,304.44


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-10-21_20-44-12
  done: false
  episode_len_mean: 303.96
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -30.396000000000164
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 4
  episodes_total: 383
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02669677734375
          cur_lr: 5.000000000000001e-05
          entropy: 1.1480597820546892
          entropy_coeff: 0.009999999999999998
          kl: 0.020440968043877326
          policy_loss: -0.02140719604988893
          total_loss: 0.6557860768503613
          vf_explained_var: 0.776045560836792
          vf_loss: 0.688128164741728
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_trained: 154000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,154,3527.81,154000,-30.396,-24.8,-37.9,303.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-10-21_20-44-43
  done: false
  episode_len_mean: 302.98
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -30.298000000000158
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 4
  episodes_total: 387
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04004516601562501
          cur_lr: 5.000000000000001e-05
          entropy: 0.9139150142669678
          entropy_coeff: 0.009999999999999998
          kl: 0.007737636204757514
          policy_loss: -0.04682200935979684
          total_loss: 0.3767133857640955
          vf_explained_var: 0.8809241652488708
          vf_loss: 0.4323646879858441
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_trained: 155000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,155,3559.35,155000,-30.298,-24.8,-37.9,302.98




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-10-21_20-45-32
  done: false
  episode_len_mean: 301.26
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -30.126000000000154
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 4
  episodes_total: 391
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04004516601562501
          cur_lr: 5.000000000000001e-05
          entropy: 0.9615419679217868
          entropy_coeff: 0.009999999999999998
          kl: 0.018844344804644367
          policy_loss: -0.07196840047836303
          total_loss: 0.33485991433262824
          vf_explained_var: 0.8427918553352356
          vf_loss: 0.4156891115837627
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained: 156000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,156,3608.44,156000,-30.126,-22.2,-37.9,301.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-10-21_20-46-03
  done: false
  episode_len_mean: 299.72
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -29.972000000000158
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 4
  episodes_total: 395
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04004516601562501
          cur_lr: 5.000000000000001e-05
          entropy: 0.9471124443742964
          entropy_coeff: 0.009999999999999998
          kl: 0.006417382996981837
          policy_loss: 0.01634473270840115
          total_loss: 0.649523514840338
          vf_explained_var: 0.8375279903411865
          vf_loss: 0.6423929111825095
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trained: 157000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,157,3638.86,157000,-29.972,-22.2,-37.9,299.72


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-10-21_20-46-33
  done: false
  episode_len_mean: 297.57
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -29.757000000000154
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 4
  episodes_total: 399
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04004516601562501
          cur_lr: 5.000000000000001e-05
          entropy: 0.9092835393216875
          entropy_coeff: 0.009999999999999998
          kl: 0.010208727842575469
          policy_loss: -0.04447726790482799
          total_loss: 0.19775542521642314
          vf_explained_var: 0.9316434264183044
          vf_loss: 0.2509167164564133
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained: 158000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,158,3669.25,158000,-29.757,-22.2,-37.9,297.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-10-21_20-47-04
  done: false
  episode_len_mean: 295.22
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -29.522000000000144
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 4
  episodes_total: 403
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04004516601562501
          cur_lr: 5.000000000000001e-05
          entropy: 0.881749235259162
          entropy_coeff: 0.009999999999999998
          kl: 0.007832688715181746
          policy_loss: -0.0327750184883674
          total_loss: 0.4316386106941435
          vf_explained_var: 0.8508394956588745
          vf_loss: 0.4729174577527576
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_trained: 159000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,159,3700.23,159000,-29.522,-22.2,-37.9,295.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-10-21_20-47-34
  done: false
  episode_len_mean: 293.85
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -29.385000000000147
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 3
  episodes_total: 406
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04004516601562501
          cur_lr: 5.000000000000001e-05
          entropy: 0.8459202799532148
          entropy_coeff: 0.009999999999999998
          kl: 0.0074901935751000796
          policy_loss: -0.024657395916680494
          total_loss: 0.7020784444279141
          vf_explained_var: 0.6100703477859497
          vf_loss: 0.7348950995339287
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,160,3730.04,160000,-29.385,-22.2,-37.9,293.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-10-21_20-48-07
  done: false
  episode_len_mean: 290.75
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -29.075000000000145
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 4
  episodes_total: 410
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04004516601562501
          cur_lr: 5.000000000000001e-05
          entropy: 0.7640062795745002
          entropy_coeff: 0.009999999999999998
          kl: 0.002842620053685252
          policy_loss: -0.024739543596903484
          total_loss: 0.7424822058942583
          vf_explained_var: 0.4279322028160095
          vf_loss: 0.7747479716936747
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_trained: 161000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,161,3762.8,161000,-29.075,-22.2,-37.9,290.75


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-10-21_20-48-39
  done: false
  episode_len_mean: 287.93
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -28.793000000000138
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 4
  episodes_total: 414
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.020022583007812504
          cur_lr: 5.000000000000001e-05
          entropy: 0.7548562321397994
          entropy_coeff: 0.009999999999999998
          kl: 0.002492012356679475
          policy_loss: -0.0022492009732458327
          total_loss: 0.7117841402689616
          vf_explained_var: 0.4357367157936096
          vf_loss: 0.7215320097075568
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_trained: 162000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,162,3794.93,162000,-28.793,-22.2,-37.9,287.93


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-10-21_20-49-09
  done: false
  episode_len_mean: 286.75
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -28.67500000000014
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 4
  episodes_total: 418
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010011291503906252
          cur_lr: 5.000000000000001e-05
          entropy: 0.7228276279237535
          entropy_coeff: 0.009999999999999998
          kl: 0.004577058815517848
          policy_loss: 0.02944349286456903
          total_loss: 0.7761335240470039
          vf_explained_var: 0.5135079026222229
          vf_loss: 0.7538724879423777
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trained: 163000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,163,3824.62,163000,-28.675,-22.2,-37.9,286.75




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-10-21_20-49-58
  done: false
  episode_len_mean: 284.81
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -28.481000000000137
  episode_reward_min: -37.90000000000027
  episodes_this_iter: 4
  episodes_total: 422
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005005645751953126
          cur_lr: 5.000000000000001e-05
          entropy: 0.7879797008302477
          entropy_coeff: 0.009999999999999998
          kl: 0.006233123846794481
          policy_loss: 0.0720266488691171
          total_loss: 0.7069452404975891
          vf_explained_var: 0.5055983066558838
          vf_loss: 0.6427671876218584
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_trained: 164000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,164,3873.65,164000,-28.481,-21.4,-37.9,284.81


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-10-21_20-50-27
  done: false
  episode_len_mean: 282.0
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -28.20000000000013
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 426
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005005645751953126
          cur_lr: 5.000000000000001e-05
          entropy: 0.9314579897456698
          entropy_coeff: 0.009999999999999998
          kl: 0.01938715228782123
          policy_loss: 0.029023211987482176
          total_loss: 1.173813517888387
          vf_explained_var: 0.23185107111930847
          vf_loss: 1.1540078414811028
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_trained: 165000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,165,3902.52,165000,-28.2,-21.4,-37.1,282


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-10-21_20-50-59
  done: false
  episode_len_mean: 278.23
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -27.823000000000125
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 430
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005005645751953126
          cur_lr: 5.000000000000001e-05
          entropy: 0.6618275668885972
          entropy_coeff: 0.009999999999999998
          kl: 0.006432625412464077
          policy_loss: -0.013253736495971679
          total_loss: 0.769233414861891
          vf_explained_var: 0.5302757024765015
          vf_loss: 0.7890732321474287
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 166000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,166,3934.55,166000,-27.823,-21.4,-37.1,278.23


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-10-21_20-51-27
  done: false
  episode_len_mean: 275.59
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -27.55900000000012
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 3
  episodes_total: 433
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005005645751953126
          cur_lr: 5.000000000000001e-05
          entropy: 0.8215650293562148
          entropy_coeff: 0.009999999999999998
          kl: 0.01052832646938327
          policy_loss: -0.04890484346283807
          total_loss: 0.6434829867548413
          vf_explained_var: 0.5470050573348999
          vf_loss: 0.7005507762233416
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained: 167000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,167,3962.77,167000,-27.559,-21.4,-37.1,275.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-10-21_20-51-54
  done: false
  episode_len_mean: 273.3
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -27.330000000000123
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 437
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005005645751953126
          cur_lr: 5.000000000000001e-05
          entropy: 0.9871489008267721
          entropy_coeff: 0.009999999999999998
          kl: 0.01819824704602466
          policy_loss: 0.015299135943253835
          total_loss: 1.0246570143434737
          vf_explained_var: 0.3306269645690918
          vf_loss: 1.019138279888365
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 168000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,168,3989.86,168000,-27.33,-21.4,-37.1,273.3


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-10-21_20-52-24
  done: false
  episode_len_mean: 270.52
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -27.052000000000113
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 441
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005005645751953126
          cur_lr: 5.000000000000001e-05
          entropy: 0.8569641775555081
          entropy_coeff: 0.009999999999999998
          kl: 0.0021980687607681816
          policy_loss: -0.004963261882464091
          total_loss: 0.9636235998736488
          vf_explained_var: 0.4334704577922821
          vf_loss: 0.9771455069382985
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_trained: 169000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,169,4020.15,169000,-27.052,-21.4,-37.1,270.52


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-10-21_20-52-56
  done: false
  episode_len_mean: 267.89
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -26.78900000000011
  episode_reward_min: -33.1000000000002
  episodes_this_iter: 3
  episodes_total: 444
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 0.7940115650494893
          entropy_coeff: 0.009999999999999998
          kl: 0.0026674457818749095
          policy_loss: 0.029601474561625058
          total_loss: 0.6792488823334376
          vf_explained_var: 0.5669960379600525
          vf_loss: 0.6575808417465951
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 170000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,170,4051.46,170000,-26.789,-21.4,-33.1,267.89


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-10-21_20-53-27
  done: false
  episode_len_mean: 265.35
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -26.53500000000011
  episode_reward_min: -33.1000000000002
  episodes_this_iter: 4
  episodes_total: 448
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0012514114379882815
          cur_lr: 5.000000000000001e-05
          entropy: 0.6877498497565587
          entropy_coeff: 0.009999999999999998
          kl: 0.02411790086934293
          policy_loss: -0.0010743404428164164
          total_loss: 0.9128488341967265
          vf_explained_var: 0.3942398130893707
          vf_loss: 0.920770490831799
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained: 171000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,171,4083.02,171000,-26.535,-21.4,-33.1,265.35




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-10-21_20-54-14
  done: false
  episode_len_mean: 263.39
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -26.33900000000011
  episode_reward_min: -31.70000000000018
  episodes_this_iter: 4
  episodes_total: 452
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018771171569824212
          cur_lr: 5.000000000000001e-05
          entropy: 1.017064995235867
          entropy_coeff: 0.009999999999999998
          kl: 0.020179465321627926
          policy_loss: 0.05323636200692919
          total_loss: 1.1091514885425569
          vf_explained_var: 0.2513103485107422
          vf_loss: 1.0660478969415028
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained: 172000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,172,4129.88,172000,-26.339,-21.4,-31.7,263.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-10-21_20-54-43
  done: false
  episode_len_mean: 262.67
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -26.267000000000102
  episode_reward_min: -31.70000000000018
  episodes_this_iter: 4
  episodes_total: 456
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0028156757354736335
          cur_lr: 5.000000000000001e-05
          entropy: 0.9330057488547431
          entropy_coeff: 0.009999999999999998
          kl: 0.006775213882591499
          policy_loss: 0.010923383881648381
          total_loss: 0.9116131557358635
          vf_explained_var: 0.33691415190696716
          vf_loss: 0.9100007408195072
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained: 173000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,173,4158.52,173000,-26.267,-21.4,-31.7,262.67


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-10-21_20-55-12
  done: false
  episode_len_mean: 262.26
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -26.226000000000102
  episode_reward_min: -30.20000000000016
  episodes_this_iter: 3
  episodes_total: 459
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0028156757354736335
          cur_lr: 5.000000000000001e-05
          entropy: 1.0455348584387036
          entropy_coeff: 0.009999999999999998
          kl: 0.015327724968124793
          policy_loss: 0.001702967948383755
          total_loss: 0.8525436679522197
          vf_explained_var: 0.15387853980064392
          vf_loss: 0.8612528973155551
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_trained: 174000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,174,4187.56,174000,-26.226,-21.4,-30.2,262.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-10-21_20-55-40
  done: false
  episode_len_mean: 263.08
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -26.308000000000103
  episode_reward_min: -30.20000000000016
  episodes_this_iter: 4
  episodes_total: 463
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0028156757354736335
          cur_lr: 5.000000000000001e-05
          entropy: 1.060361001888911
          entropy_coeff: 0.009999999999999998
          kl: 0.00680427349444841
          policy_loss: 0.004797399126821094
          total_loss: 1.3381534563170538
          vf_explained_var: 0.07557439059019089
          vf_loss: 1.3439405136638218
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_trained: 175000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,175,4215.18,175000,-26.308,-21.4,-30.2,263.08


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-10-21_20-56-07
  done: false
  episode_len_mean: 263.86
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -26.386000000000102
  episode_reward_min: -30.20000000000016
  episodes_this_iter: 3
  episodes_total: 466
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0028156757354736335
          cur_lr: 5.000000000000001e-05
          entropy: 1.1043853905465868
          entropy_coeff: 0.009999999999999998
          kl: 0.0211380203585978
          policy_loss: 0.03897610306739807
          total_loss: 0.9411506457461252
          vf_explained_var: 0.09254626929759979
          vf_loss: 0.9131588898185227
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained: 176000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,176,4242.7,176000,-26.386,-21.4,-30.2,263.86


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-10-21_20-56-33
  done: false
  episode_len_mean: 264.69
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -26.469000000000104
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 3
  episodes_total: 469
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00422351360321045
          cur_lr: 5.000000000000001e-05
          entropy: 0.9005283064312405
          entropy_coeff: 0.009999999999999998
          kl: 0.010511181933065297
          policy_loss: -0.10079198429981867
          total_loss: 1.2636253012551202
          vf_explained_var: 0.1251937299966812
          vf_loss: 1.3733781788084243
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_trained: 177000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,177,4268.24,177000,-26.469,-21.4,-32.7,264.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-10-21_20-56-59
  done: false
  episode_len_mean: 265.75
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -26.57500000000011
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 473
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00422351360321045
          cur_lr: 5.000000000000001e-05
          entropy: 0.8656369090080261
          entropy_coeff: 0.009999999999999998
          kl: 0.006141656474028561
          policy_loss: 0.02808863619963328
          total_loss: 1.3897104051378038
          vf_explained_var: 0.09407363831996918
          vf_loss: 1.3702521827485827
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained: 178000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,178,4294.49,178000,-26.575,-21.4,-32.7,265.75


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-10-21_20-57-27
  done: false
  episode_len_mean: 266.97
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -26.697000000000113
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 3
  episodes_total: 476
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00422351360321045
          cur_lr: 5.000000000000001e-05
          entropy: 0.7348643408881294
          entropy_coeff: 0.009999999999999998
          kl: 0.004551392204567467
          policy_loss: 0.040823009775744545
          total_loss: 1.0762852198547788
          vf_explained_var: -0.013253687880933285
          vf_loss: 1.0427916341357761
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained: 179000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,179,4322.55,179000,-26.697,-21.4,-32.7,266.97




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-10-21_20-58-12
  done: false
  episode_len_mean: 268.16
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -26.816000000000113
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 480
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002111756801605225
          cur_lr: 5.000000000000001e-05
          entropy: 0.7332213011052874
          entropy_coeff: 0.009999999999999998
          kl: 0.01064516500421304
          policy_loss: 0.021000791092713675
          total_loss: 1.4452324509620667
          vf_explained_var: 0.10980185121297836
          vf_loss: 1.431541383266449
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,180,4367.61,180000,-26.816,-21.4,-32.7,268.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-10-21_20-58-44
  done: false
  episode_len_mean: 268.5
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -26.850000000000115
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 3
  episodes_total: 483
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002111756801605225
          cur_lr: 5.000000000000001e-05
          entropy: 0.5993460595607758
          entropy_coeff: 0.009999999999999998
          kl: 0.004949053464448067
          policy_loss: -0.07071097816030185
          total_loss: 1.3297975765334236
          vf_explained_var: 0.10316721349954605
          vf_loss: 1.406491552458869
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 181000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,181,4399.41,181000,-26.85,-21.4,-32.7,268.5


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-10-21_20-59-14
  done: false
  episode_len_mean: 269.65
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -26.96500000000011
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 487
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010558784008026126
          cur_lr: 5.000000000000001e-05
          entropy: 0.5877439700894885
          entropy_coeff: 0.009999999999999998
          kl: 0.008538892295015425
          policy_loss: 0.027680585698948965
          total_loss: 1.4386320908864338
          vf_explained_var: 0.14988410472869873
          vf_loss: 1.4168199247784086
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_trained: 182000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,182,4429.45,182000,-26.965,-21.4,-32.7,269.65


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-10-21_20-59-43
  done: false
  episode_len_mean: 270.93
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -27.093000000000117
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 3
  episodes_total: 490
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010558784008026126
          cur_lr: 5.000000000000001e-05
          entropy: 0.7126424312591553
          entropy_coeff: 0.009999999999999998
          kl: 0.015401905312574056
          policy_loss: -0.08793624291817347
          total_loss: 1.4292903105417887
          vf_explained_var: 0.06887908279895782
          vf_loss: 1.5243367102411058
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 183000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,183,4458.55,183000,-27.093,-21.4,-32.7,270.93


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-10-21_21-00-11
  done: false
  episode_len_mean: 272.11
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -27.211000000000116
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 494
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010558784008026126
          cur_lr: 5.000000000000001e-05
          entropy: 0.5829520020220015
          entropy_coeff: 0.009999999999999998
          kl: 0.012853724199054166
          policy_loss: 0.03401496170295609
          total_loss: 1.3626397530237833
          vf_explained_var: 0.17755164206027985
          vf_loss: 1.334440745247735
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 184000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,184,4486.03,184000,-27.211,-21.4,-32.7,272.11


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-10-21_21-00-41
  done: false
  episode_len_mean: 273.0
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -27.300000000000118
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 498
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010558784008026126
          cur_lr: 5.000000000000001e-05
          entropy: 0.5599743300014072
          entropy_coeff: 0.009999999999999998
          kl: 0.015826931173104065
          policy_loss: 0.005917393250597848
          total_loss: 1.359510362148285
          vf_explained_var: 0.20289921760559082
          vf_loss: 1.3591760065820482
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained: 185000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,185,4515.79,185000,-27.3,-21.4,-32.7,273


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-10-21_21-01-08
  done: false
  episode_len_mean: 274.16
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -27.41600000000012
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 3
  episodes_total: 501
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010558784008026126
          cur_lr: 5.000000000000001e-05
          entropy: 0.6348694304625193
          entropy_coeff: 0.009999999999999998
          kl: 0.018882814346785418
          policy_loss: 0.06348238839871354
          total_loss: 1.1370710637834338
          vf_explained_var: 0.16043804585933685
          vf_loss: 1.079917445116573
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained: 186000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,186,4543.48,186000,-27.416,-21.4,-32.7,274.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-10-21_21-01-36
  done: false
  episode_len_mean: 275.06
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -27.506000000000117
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 505
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010558784008026126
          cur_lr: 5.000000000000001e-05
          entropy: 0.6017925533983443
          entropy_coeff: 0.009999999999999998
          kl: 0.008079671885626125
          policy_loss: -0.002647892799642351
          total_loss: 1.5499976303842333
          vf_explained_var: 0.06900345534086227
          vf_loss: 1.5586549348301357
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_trained: 187000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,187,4571.24,187000,-27.506,-21.4,-32.7,275.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-10-21_21-02-04
  done: false
  episode_len_mean: 275.62
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -27.562000000000115
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 3
  episodes_total: 508
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010558784008026126
          cur_lr: 5.000000000000001e-05
          entropy: 0.5639403763744566
          entropy_coeff: 0.009999999999999998
          kl: 0.03593850003634584
          policy_loss: 0.015062489691707823
          total_loss: 0.9897222336795595
          vf_explained_var: 0.17654567956924438
          vf_loss: 0.9802612062957552
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 188000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,188,4598.86,188000,-27.562,-21.4,-32.7,275.62




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-10-21_21-02-50
  done: false
  episode_len_mean: 276.0
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -27.60000000000012
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 512
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015838176012039178
          cur_lr: 5.000000000000001e-05
          entropy: 0.6265286246935526
          entropy_coeff: 0.009999999999999998
          kl: 0.019363244932029015
          policy_loss: 0.030033460838927162
          total_loss: 1.5665611028671265
          vf_explained_var: 0.0637979507446289
          vf_loss: 1.5427622424231635
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained: 189000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,189,4645.35,189000,-27.6,-21.4,-32.7,276


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-10-21_21-03-22
  done: false
  episode_len_mean: 276.24
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -27.62400000000012
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 516
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015838176012039178
          cur_lr: 5.000000000000001e-05
          entropy: 0.4900130954053667
          entropy_coeff: 0.009999999999999998
          kl: 0.005665184500984507
          policy_loss: 0.015802342361874052
          total_loss: 1.517798278066847
          vf_explained_var: 0.09058792144060135
          vf_loss: 1.5068871259689331
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 190000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,190,4676.93,190000,-27.624,-21.4,-32.7,276.24


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-10-21_21-03-55
  done: false
  episode_len_mean: 275.71
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.57100000000012
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 520
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015838176012039178
          cur_lr: 5.000000000000001e-05
          entropy: 0.33075398902098335
          entropy_coeff: 0.009999999999999998
          kl: 0.03306473233210502
          policy_loss: 0.04045899444156223
          total_loss: 1.183303388622072
          vf_explained_var: 0.34923043847084045
          vf_loss: 1.1460995654265085
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained: 191000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,191,4710.15,191000,-27.571,-23.2,-32.7,275.71


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-10-21_21-04-29
  done: false
  episode_len_mean: 274.37
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.437000000000115
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 524
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0023757264018058784
          cur_lr: 5.000000000000001e-05
          entropy: 0.4700515323215061
          entropy_coeff: 0.009999999999999998
          kl: 0.030443305993885082
          policy_loss: -0.00025695827272203235
          total_loss: 1.0150256858931648
          vf_explained_var: 0.5849782228469849
          vf_loss: 1.0199108322461445
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 192000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,192,4743.88,192000,-27.437,-23.2,-32.7,274.37


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 193000
  custom_metrics: {}
  date: 2021-10-21_21-05-02
  done: false
  episode_len_mean: 273.71
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.371000000000116
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 528
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0035635896027088167
          cur_lr: 5.000000000000001e-05
          entropy: 0.45998558004697165
          entropy_coeff: 0.009999999999999998
          kl: 0.012183676282012909
          policy_loss: 0.05497473080952962
          total_loss: 0.836394672261344
          vf_explained_var: 0.7843515276908875
          vf_loss: 0.7859763734870486
    num_agent_steps_sampled: 193000
    num_agent_steps_trained: 193000
    num_steps_sampled: 193000
    num_steps_trained: 193000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,193,4776.83,193000,-27.371,-23.2,-32.7,273.71


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 194000
  custom_metrics: {}
  date: 2021-10-21_21-05-34
  done: false
  episode_len_mean: 273.26
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.326000000000118
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 532
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0035635896027088167
          cur_lr: 5.000000000000001e-05
          entropy: 0.516852907008595
          entropy_coeff: 0.009999999999999998
          kl: 0.011032264894486825
          policy_loss: 0.02085655393699805
          total_loss: 0.7377021862400903
          vf_explained_var: 0.7034786939620972
          vf_loss: 0.7219748516877492
    num_agent_steps_sampled: 194000
    num_agent_steps_trained: 194000
    num_steps_sampled: 194000
    num_steps_trained: 194000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,194,4809.26,194000,-27.326,-23.2,-32.7,273.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 195000
  custom_metrics: {}
  date: 2021-10-21_21-06-05
  done: false
  episode_len_mean: 272.75
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.27500000000012
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 536
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0035635896027088167
          cur_lr: 5.000000000000001e-05
          entropy: 0.6313737097713682
          entropy_coeff: 0.009999999999999998
          kl: 0.0047680817759720795
          policy_loss: 0.07763926444782152
          total_loss: 0.8839013781812456
          vf_explained_var: 0.6764504909515381
          vf_loss: 0.8125588648849064
    num_agent_steps_sampled: 195000
    num_agent_steps_trained: 195000
    num_steps_sampled: 195000
    num_steps_trained: 195000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,195,4839.54,195000,-27.275,-23.2,-32.7,272.75




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-10-21_21-06-50
  done: false
  episode_len_mean: 272.35
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.23500000000011
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 540
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0017817948013544084
          cur_lr: 5.000000000000001e-05
          entropy: 0.8840104103088379
          entropy_coeff: 0.009999999999999998
          kl: 0.017954417185014292
          policy_loss: 0.003857628794180022
          total_loss: 1.1433318118254343
          vf_explained_var: 0.4988780915737152
          vf_loss: 1.1482822895050049
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained: 196000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,196,4884.91,196000,-27.235,-23.2,-32.7,272.35


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 197000
  custom_metrics: {}
  date: 2021-10-21_21-07-22
  done: false
  episode_len_mean: 272.03
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.20300000000011
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 544
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0017817948013544084
          cur_lr: 5.000000000000001e-05
          entropy: 0.7599326809247334
          entropy_coeff: 0.009999999999999998
          kl: 0.03373165514279954
          policy_loss: -0.019118833873007033
          total_loss: 1.0622852232721116
          vf_explained_var: 0.4668944478034973
          vf_loss: 1.0889432675308652
    num_agent_steps_sampled: 197000
    num_agent_steps_trained: 197000
    num_steps_sampled: 197000
    num_steps_trained: 197000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,197,4917.1,197000,-27.203,-23.2,-32.7,272.03


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 198000
  custom_metrics: {}
  date: 2021-10-21_21-07-54
  done: false
  episode_len_mean: 271.93
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.193000000000115
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 3
  episodes_total: 547
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0026726922020316123
          cur_lr: 5.000000000000001e-05
          entropy: 0.5761173533068763
          entropy_coeff: 0.009999999999999998
          kl: 0.004049155699563626
          policy_loss: -0.1733350063363711
          total_loss: 0.5092951350741917
          vf_explained_var: 0.6832531690597534
          vf_loss: 0.6883804943826464
    num_agent_steps_sampled: 198000
    num_agent_steps_trained: 198000
    num_steps_sampled: 198000
    num_steps_trained: 198000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,198,4948.77,198000,-27.193,-23.2,-32.7,271.93


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 199000
  custom_metrics: {}
  date: 2021-10-21_21-08-23
  done: false
  episode_len_mean: 272.88
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -27.288000000000114
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 551
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013363461010158062
          cur_lr: 5.000000000000001e-05
          entropy: 0.7597520450750986
          entropy_coeff: 0.009999999999999998
          kl: 0.020592545321556858
          policy_loss: 0.018512424247132408
          total_loss: 1.2401916013823615
          vf_explained_var: 0.34212037920951843
          vf_loss: 1.2292491767141553
    num_agent_steps_sampled: 199000
    num_agent_steps_trained: 199000
    num_steps_sampled: 199000
    num_steps_trained: 199000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,199,4977.68,199000,-27.288,-23.9,-32.7,272.88


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-10-21_21-08-48
  done: false
  episode_len_mean: 275.15
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -27.51500000000012
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 2
  episodes_total: 553
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00200451915152371
          cur_lr: 5.000000000000001e-05
          entropy: 0.944033043914371
          entropy_coeff: 0.009999999999999998
          kl: 0.011002314915674482
          policy_loss: -0.08259742624229854
          total_loss: 0.9296575910515256
          vf_explained_var: -0.030084429308772087
          vf_loss: 1.0216732988754909
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,200,5002.72,200000,-27.515,-23.9,-41,275.15


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 201000
  custom_metrics: {}
  date: 2021-10-21_21-09-08
  done: false
  episode_len_mean: 279.36
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -27.936000000000128
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 3
  episodes_total: 556
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00200451915152371
          cur_lr: 5.000000000000001e-05
          entropy: 0.9029397825400035
          entropy_coeff: 0.009999999999999998
          kl: 0.00937573887377445
          policy_loss: 0.06289704235063659
          total_loss: 1.183012412654029
          vf_explained_var: -0.25626060366630554
          vf_loss: 1.1291259721749358
    num_agent_steps_sampled: 201000
    num_agent_steps_trained: 201000
    num_steps_sampled: 201000
    num_steps_trained: 201000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,201,5022.45,201000,-27.936,-23.9,-45.7,279.36


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 202000
  custom_metrics: {}
  date: 2021-10-21_21-09-27
  done: false
  episode_len_mean: 282.83
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -28.28300000000013
  episode_reward_min: -46.9000000000004
  episodes_this_iter: 2
  episodes_total: 558
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00200451915152371
          cur_lr: 5.000000000000001e-05
          entropy: 0.8878528383043077
          entropy_coeff: 0.009999999999999998
          kl: 0.007324885574762479
          policy_loss: 0.08646438949637943
          total_loss: 0.6876029305987887
          vf_explained_var: -0.6531229615211487
          vf_loss: 0.6100023988427388
    num_agent_steps_sampled: 202000
    num_agent_steps_trained: 202000
    num_steps_sampled: 202000
    num_steps_trained: 202000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,202,5041.4,202000,-28.283,-23.9,-46.9,282.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 203000
  custom_metrics: {}
  date: 2021-10-21_21-09-44
  done: false
  episode_len_mean: 287.58
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -28.758000000000138
  episode_reward_min: -52.500000000000476
  episodes_this_iter: 2
  episodes_total: 560
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00200451915152371
          cur_lr: 5.000000000000001e-05
          entropy: 0.903844228055742
          entropy_coeff: 0.009999999999999998
          kl: 0.006018421828418359
          policy_loss: 0.09673535625139872
          total_loss: 0.7492094612783856
          vf_explained_var: -0.6019647717475891
          vf_loss: 0.6615004951548246
    num_agent_steps_sampled: 203000
    num_agent_steps_trained: 203000
    num_steps_sampled: 203000
    num_steps_trained: 203000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,203,5058.68,203000,-28.758,-23.9,-52.5,287.58


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 204000
  custom_metrics: {}
  date: 2021-10-21_21-10-02
  done: false
  episode_len_mean: 291.7
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -29.17000000000014
  episode_reward_min: -52.500000000000476
  episodes_this_iter: 2
  episodes_total: 562
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00200451915152371
          cur_lr: 5.000000000000001e-05
          entropy: 0.9135278436872695
          entropy_coeff: 0.009999999999999998
          kl: 0.0037630906058468603
          policy_loss: 0.08283307419882881
          total_loss: 0.7550964186588923
          vf_explained_var: -0.6504724025726318
          vf_loss: 0.681391086946759
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_steps_sampled: 204000
    num_steps_trained: 204000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,204,5077.09,204000,-29.17,-23.9,-52.5,291.7


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 205000
  custom_metrics: {}
  date: 2021-10-21_21-10-19
  done: false
  episode_len_mean: 296.62
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -29.66200000000015
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 564
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001002259575761855
          cur_lr: 5.000000000000001e-05
          entropy: 0.9621102498637305
          entropy_coeff: 0.009999999999999998
          kl: 0.0109580974938068
          policy_loss: 0.09517829798989826
          total_loss: 0.7844323373503155
          vf_explained_var: -0.6630849838256836
          vf_loss: 0.6988641700914336
    num_agent_steps_sampled: 205000
    num_agent_steps_trained: 205000
    num_steps_sampled: 205000
    num_steps_trained: 205000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,205,5093.95,205000,-29.662,-23.9,-54.4,296.62


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 206000
  custom_metrics: {}
  date: 2021-10-21_21-10-37
  done: false
  episode_len_mean: 301.4
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -30.14000000000016
  episode_reward_min: -55.900000000000524
  episodes_this_iter: 2
  episodes_total: 566
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001002259575761855
          cur_lr: 5.000000000000001e-05
          entropy: 0.9865347981452942
          entropy_coeff: 0.009999999999999998
          kl: 0.0062097002494277
          policy_loss: 0.09269872572686937
          total_loss: 0.816923494802581
          vf_explained_var: -0.28112614154815674
          vf_loss: 0.7340838875207636
    num_agent_steps_sampled: 206000
    num_agent_steps_trained: 206000
    num_steps_sampled: 206000
    num_steps_trained: 206000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,206,5111.34,206000,-30.14,-23.9,-55.9,301.4


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 207000
  custom_metrics: {}
  date: 2021-10-21_21-10-52
  done: false
  episode_len_mean: 304.49
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -30.449000000000165
  episode_reward_min: -60.900000000000595
  episodes_this_iter: 1
  episodes_total: 567
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001002259575761855
          cur_lr: 5.000000000000001e-05
          entropy: 0.9262494934929741
          entropy_coeff: 0.009999999999999998
          kl: 0.005421110529013262
          policy_loss: -0.053231495122114815
          total_loss: 0.6701707836654451
          vf_explained_var: -0.3935410678386688
          vf_loss: 0.7326593436300755
    num_agent_steps_sampled: 207000
    num_agent_steps_trained: 207000
    num_steps_sampled: 207000
    num_steps_trained: 207000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,207,5126.61,207000,-30.449,-23.9,-60.9,304.49


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-10-21_21-11-07
  done: false
  episode_len_mean: 310.31
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -31.031000000000166
  episode_reward_min: -60.900000000000595
  episodes_this_iter: 2
  episodes_total: 569
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001002259575761855
          cur_lr: 5.000000000000001e-05
          entropy: 0.8559459990925259
          entropy_coeff: 0.009999999999999998
          kl: 0.006069102094052642
          policy_loss: -0.07729183485110601
          total_loss: 1.349323680996895
          vf_explained_var: -0.2987045645713806
          vf_loss: 1.4351689020792644
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 208000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,208,5142.05,208000,-31.031,-23.9,-60.9,310.31




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 209000
  custom_metrics: {}
  date: 2021-10-21_21-11-43
  done: false
  episode_len_mean: 314.7
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -31.470000000000176
  episode_reward_min: -60.900000000000595
  episodes_this_iter: 2
  episodes_total: 571
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001002259575761855
          cur_lr: 5.000000000000001e-05
          entropy: 0.8828593618339963
          entropy_coeff: 0.009999999999999998
          kl: 0.003585327618040118
          policy_loss: 0.028492622077465057
          total_loss: 0.8318837698962953
          vf_explained_var: -0.09714534878730774
          vf_loss: 0.8122161585423682
    num_agent_steps_sampled: 209000
    num_agent_steps_trained: 209000
    num_steps_sampled: 209000
    num_steps_trained: 209000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,209,5178.2,209000,-31.47,-23.9,-60.9,314.7


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-10-21_21-11-59
  done: false
  episode_len_mean: 320.06
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -32.00600000000018
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 573
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005011297878809275
          cur_lr: 5.000000000000001e-05
          entropy: 0.8449890547328525
          entropy_coeff: 0.009999999999999998
          kl: 0.005854844935606677
          policy_loss: 0.0939280738433202
          total_loss: 0.8224221616983414
          vf_explained_var: -0.34204670786857605
          vf_loss: 0.7369410682676567
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_trained: 210000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,210,5193.93,210000,-32.006,-23.9,-62,320.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 211000
  custom_metrics: {}
  date: 2021-10-21_21-12-14
  done: false
  episode_len_mean: 322.77
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -32.277000000000186
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 1
  episodes_total: 574
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005011297878809275
          cur_lr: 5.000000000000001e-05
          entropy: 0.8514835993448894
          entropy_coeff: 0.009999999999999998
          kl: 0.005858278320384929
          policy_loss: -0.05416114065382216
          total_loss: 0.7038252272539669
          vf_explained_var: -0.3956053555011749
          vf_loss: 0.7664982682507899
    num_agent_steps_sampled: 211000
    num_agent_steps_trained: 211000
    num_steps_sampled: 211000
    num_steps_trained: 211000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,211,5208.2,211000,-32.277,-23.9,-62,322.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2021-10-21_21-12-31
  done: false
  episode_len_mean: 327.56
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -32.75600000000019
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 576
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005011297878809275
          cur_lr: 5.000000000000001e-05
          entropy: 0.8253431207603878
          entropy_coeff: 0.009999999999999998
          kl: 0.004326859797042242
          policy_loss: -0.07772946159044901
          total_loss: 1.3695332417885462
          vf_explained_var: -0.2780912518501282
          vf_loss: 1.4555139599574938
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_steps_sampled: 212000
    num_steps_trained: 212000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,212,5225.72,212000,-32.756,-23.9,-62,327.56


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 213000
  custom_metrics: {}
  date: 2021-10-21_21-12-49
  done: false
  episode_len_mean: 331.47
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -33.147000000000205
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 578
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00025056489394046375
          cur_lr: 5.000000000000001e-05
          entropy: 0.853010067012575
          entropy_coeff: 0.009999999999999998
          kl: 0.0032651739807612835
          policy_loss: -0.08178930895196067
          total_loss: 1.373125558429294
          vf_explained_var: -0.26445865631103516
          vf_loss: 1.46344413889779
    num_agent_steps_sampled: 213000
    num_agent_steps_trained: 213000
    num_steps_sampled: 213000
    num_steps_trained: 213000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,213,5243.62,213000,-33.147,-23.9,-62,331.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 214000
  custom_metrics: {}
  date: 2021-10-21_21-13-05
  done: false
  episode_len_mean: 337.46
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -33.74600000000021
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 580
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00012528244697023187
          cur_lr: 5.000000000000001e-05
          entropy: 0.8314125537872314
          entropy_coeff: 0.009999999999999998
          kl: 0.003861808413896522
          policy_loss: -0.0899796982606252
          total_loss: 1.3416604535447227
          vf_explained_var: -0.23257286846637726
          vf_loss: 1.4399537912673421
    num_agent_steps_sampled: 214000
    num_agent_steps_trained: 214000
    num_steps_sampled: 214000
    num_steps_trained: 214000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,214,5259.56,214000,-33.746,-23.9,-62,337.46


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 215000
  custom_metrics: {}
  date: 2021-10-21_21-13-21
  done: false
  episode_len_mean: 343.41
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -34.341000000000214
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 582
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.264122348511594e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7702325171894497
          entropy_coeff: 0.009999999999999998
          kl: 0.0030062026739221094
          policy_loss: 0.0886940793858634
          total_loss: 0.8102004465129641
          vf_explained_var: -0.43560853600502014
          vf_loss: 0.7292085136079953
    num_agent_steps_sampled: 215000
    num_agent_steps_trained: 215000
    num_steps_sampled: 215000
    num_steps_trained: 215000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,215,5275.99,215000,-34.341,-23.9,-62,343.41


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2021-10-21_21-13-39
  done: false
  episode_len_mean: 348.53
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -34.85300000000022
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 584
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.132061174255797e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7825129502349429
          entropy_coeff: 0.009999999999999998
          kl: 0.005729943155477852
          policy_loss: 0.09819604059060415
          total_loss: 0.8382334334982766
          vf_explained_var: -0.4703831672668457
          vf_loss: 0.747862345394161
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_steps_sampled: 216000
    num_steps_trained: 216000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,216,5293.58,216000,-34.853,-23.9,-62,348.53


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 217000
  custom_metrics: {}
  date: 2021-10-21_21-13-55
  done: false
  episode_len_mean: 351.61
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -35.16100000000023
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 1
  episodes_total: 585
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.132061174255797e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7969564278920491
          entropy_coeff: 0.009999999999999998
          kl: 0.00507612271401091
          policy_loss: -0.05305790901184082
          total_loss: 0.6927556958463457
          vf_explained_var: -0.6375241875648499
          vf_loss: 0.7537830193630524
    num_agent_steps_sampled: 217000
    num_agent_steps_trained: 217000
    num_steps_sampled: 217000
    num_steps_trained: 217000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,217,5309.9,217000,-35.161,-23.9,-62,351.61


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 218000
  custom_metrics: {}
  date: 2021-10-21_21-14-12
  done: false
  episode_len_mean: 357.43
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -35.74300000000024
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 587
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.132061174255797e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7200436863634322
          entropy_coeff: 0.009999999999999998
          kl: 0.0054798357231670225
          policy_loss: -0.07438556171125836
          total_loss: 1.381520989868376
          vf_explained_var: -0.2229265421628952
          vf_loss: 1.4631068204012183
    num_agent_steps_sampled: 218000
    num_agent_steps_trained: 218000
    num_steps_sampled: 218000
    num_steps_trained: 218000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,218,5326.48,218000,-35.743,-23.9,-62,357.43


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 219000
  custom_metrics: {}
  date: 2021-10-21_21-14-27
  done: false
  episode_len_mean: 363.08
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -36.30800000000024
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 589
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.132061174255797e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7011487788624233
          entropy_coeff: 0.009999999999999998
          kl: 0.00412653275874357
          policy_loss: -0.0833927356534534
          total_loss: 1.3211610423194038
          vf_explained_var: -0.09862594306468964
          vf_loss: 1.411565159426795
    num_agent_steps_sampled: 219000
    num_agent_steps_trained: 219000
    num_steps_sampled: 219000
    num_steps_trained: 219000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,219,5341.93,219000,-36.308,-23.9,-62,363.08


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-10-21_21-14-43
  done: false
  episode_len_mean: 368.49
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -36.84900000000025
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 591
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5660305871278984e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7194507704840766
          entropy_coeff: 0.009999999999999998
          kl: 0.004444316611873327
          policy_loss: 0.09887288196219338
          total_loss: 0.826472266846233
          vf_explained_var: -0.19283868372440338
          vf_loss: 0.7347938367269106
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,220,5357.79,220000,-36.849,-23.9,-62,368.49


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 221000
  custom_metrics: {}
  date: 2021-10-21_21-15-00
  done: false
  episode_len_mean: 373.02
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -37.302000000000255
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 593
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.830152935639492e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7017159654034508
          entropy_coeff: 0.009999999999999998
          kl: 0.0006269003480463948
          policy_loss: 0.11198154290517172
          total_loss: 0.8204230397939682
          vf_explained_var: -0.5917019844055176
          vf_loss: 0.7154586664504475
    num_agent_steps_sampled: 221000
    num_agent_steps_trained: 221000
    num_steps_sampled: 221000
    num_steps_trained: 221000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,221,5374.68,221000,-37.302,-23.9,-62,373.02


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 222000
  custom_metrics: {}
  date: 2021-10-21_21-15-18
  done: false
  episode_len_mean: 375.66
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -37.566000000000265
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 1
  episodes_total: 594
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.915076467819746e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7496213687790765
          entropy_coeff: 0.009999999999999998
          kl: 0.005207322519744473
          policy_loss: -0.05841440541876687
          total_loss: 0.7071109203828706
          vf_explained_var: -0.24851898849010468
          vf_loss: 0.7730215289526515
    num_agent_steps_sampled: 222000
    num_agent_steps_trained: 222000
    num_steps_sampled: 222000
    num_steps_trained: 222000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,222,5392.34,222000,-37.566,-23.9,-62,375.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 223000
  custom_metrics: {}
  date: 2021-10-21_21-15-35
  done: false
  episode_len_mean: 381.0
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -38.10000000000027
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 596
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.915076467819746e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7274579862753551
          entropy_coeff: 0.009999999999999998
          kl: 0.004019592936877221
          policy_loss: -0.06343685438235601
          total_loss: 1.114662730693817
          vf_explained_var: 0.047816261649131775
          vf_loss: 1.1853741460376315
    num_agent_steps_sampled: 223000
    num_agent_steps_trained: 223000
    num_steps_sampled: 223000
    num_steps_trained: 223000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,223,5408.99,223000,-38.1,-23.9,-62,381


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-10-21_21-15-52
  done: false
  episode_len_mean: 385.47
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -38.547000000000274
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 598
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.957538233909873e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7367209421263801
          entropy_coeff: 0.009999999999999998
          kl: 0.004500312085960835
          policy_loss: -0.07773037023014492
          total_loss: 1.3337125887473424
          vf_explained_var: 0.00909516029059887
          vf_loss: 1.418810148206022
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained: 224000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,224,5426.7,224000,-38.547,-23.9,-62,385.47




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 225000
  custom_metrics: {}
  date: 2021-10-21_21-16-31
  done: false
  episode_len_mean: 388.88
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -38.88800000000028
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 601
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.787691169549365e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6811549021138086
          entropy_coeff: 0.009999999999999998
          kl: 0.006391719429280782
          policy_loss: -0.08824000656604766
          total_loss: 1.8191167857911852
          vf_explained_var: 0.08359728753566742
          vf_loss: 1.9141683459281922
    num_agent_steps_sampled: 225000
    num_agent_steps_trained: 225000
    num_steps_sampled: 225000
    num_steps_trained: 225000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,225,5465.03,225000,-38.888,-23.9,-62,388.88


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 226000
  custom_metrics: {}
  date: 2021-10-21_21-16-55
  done: false
  episode_len_mean: 390.54
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -39.054000000000286
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 604
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.787691169549365e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.638931706878874
          entropy_coeff: 0.009999999999999998
          kl: 0.0068801925210177865
          policy_loss: -0.029571980983018876
          total_loss: 1.505220694674386
          vf_explained_var: 0.03627719730138779
          vf_loss: 1.5411819881863065
    num_agent_steps_sampled: 226000
    num_agent_steps_trained: 226000
    num_steps_sampled: 226000
    num_steps_trained: 226000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,226,5489.5,226000,-39.054,-23.9,-62,390.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 227000
  custom_metrics: {}
  date: 2021-10-21_21-17-17
  done: false
  episode_len_mean: 392.83
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -39.283000000000285
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 607
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.787691169549365e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6415247334374322
          entropy_coeff: 0.009999999999999998
          kl: 0.029742885174589676
          policy_loss: 0.022250394109222624
          total_loss: 1.3527632749742933
          vf_explained_var: -0.2830550968647003
          vf_loss: 1.3369281188481383
    num_agent_steps_sampled: 227000
    num_agent_steps_trained: 227000
    num_steps_sampled: 227000
    num_steps_trained: 227000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,227,5511.24,227000,-39.283,-23.9,-62,392.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 228000
  custom_metrics: {}
  date: 2021-10-21_21-17-40
  done: false
  episode_len_mean: 395.42
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -39.54200000000029
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 610
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.468153675432404e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6957569983270433
          entropy_coeff: 0.009999999999999998
          kl: 0.016336072934294205
          policy_loss: 0.049964021808571284
          total_loss: 1.3248942755990558
          vf_explained_var: -0.06954236328601837
          vf_loss: 1.2818878144025803
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_steps_sampled: 228000
    num_steps_trained: 228000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,228,5533.97,228000,-39.542,-23.9,-62,395.42


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 229000
  custom_metrics: {}
  date: 2021-10-21_21-18-03
  done: false
  episode_len_mean: 397.72
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -39.7720000000003
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 613
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.468153675432404e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7609400967756907
          entropy_coeff: 0.009999999999999998
          kl: 0.005812435075755228
          policy_loss: 0.06515552683009042
          total_loss: 1.3330050488313039
          vf_explained_var: 0.0591459795832634
          vf_loss: 1.2754589120546977
    num_agent_steps_sampled: 229000
    num_agent_steps_trained: 229000
    num_steps_sampled: 229000
    num_steps_trained: 229000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,229,5556.92,229000,-39.772,-23.9,-62,397.72


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 230000
  custom_metrics: {}
  date: 2021-10-21_21-18-25
  done: false
  episode_len_mean: 400.16
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -40.016000000000304
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 615
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.468153675432404e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8066432601875729
          entropy_coeff: 0.009999999999999998
          kl: 0.011519495793300413
          policy_loss: -0.07982319576872719
          total_loss: 1.2471493893199497
          vf_explained_var: -0.05700457841157913
          vf_loss: 1.3350390020343992
    num_agent_steps_sampled: 230000
    num_agent_steps_trained: 230000
    num_steps_sampled: 230000
    num_steps_trained: 230000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,230,5579.36,230000,-40.016,-23.9,-62,400.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 231000
  custom_metrics: {}
  date: 2021-10-21_21-18-45
  done: false
  episode_len_mean: 405.55
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -40.55500000000031
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 618
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.468153675432404e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8797724445660909
          entropy_coeff: 0.009999999999999998
          kl: 0.012977319641410487
          policy_loss: 0.0338224347266886
          total_loss: 1.3063410037093692
          vf_explained_var: 0.047892775386571884
          vf_loss: 1.281316285994318
    num_agent_steps_sampled: 231000
    num_agent_steps_trained: 231000
    num_steps_sampled: 231000
    num_steps_trained: 231000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,231,5599.58,231000,-40.555,-23.9,-62,405.55


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 232000
  custom_metrics: {}
  date: 2021-10-21_21-19-07
  done: false
  episode_len_mean: 408.36
  episode_media: {}
  episode_reward_max: -24.00000000000007
  episode_reward_mean: -40.83600000000031
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 620
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.468153675432404e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7992507523960537
          entropy_coeff: 0.009999999999999998
          kl: 0.020490950046429448
          policy_loss: -0.08430779692199496
          total_loss: 1.2060301469431982
          vf_explained_var: 0.09141765534877777
          vf_loss: 1.298330432921648
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_steps_sampled: 232000
    num_steps_trained: 232000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,232,5621.49,232000,-40.836,-24,-62,408.36


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 233000
  custom_metrics: {}
  date: 2021-10-21_21-19-29
  done: false
  episode_len_mean: 413.22
  episode_media: {}
  episode_reward_max: -24.200000000000074
  episode_reward_mean: -41.322000000000315
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 3
  episodes_total: 623
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2022305131486063e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7708859860897064
          entropy_coeff: 0.009999999999999998
          kl: 0.024380944740641376
          policy_loss: 0.05948004325230916
          total_loss: 1.329061257839203
          vf_explained_var: 0.10969509184360504
          vf_loss: 1.277290031645033
    num_agent_steps_sampled: 233000
    num_agent_steps_trained: 233000
    num_steps_sampled: 233000
    num_steps_trained: 233000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,233,5642.96,233000,-41.322,-24.2,-62,413.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 234000
  custom_metrics: {}
  date: 2021-10-21_21-19-48
  done: false
  episode_len_mean: 417.9
  episode_media: {}
  episode_reward_max: -24.200000000000074
  episode_reward_mean: -41.79000000000032
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 625
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.303345769722911e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8548923042085436
          entropy_coeff: 0.009999999999999998
          kl: 0.03768417742855412
          policy_loss: 0.10637832035621007
          total_loss: 0.851555543144544
          vf_explained_var: 0.19210490584373474
          vf_loss: 0.7537260088655684
    num_agent_steps_sampled: 234000
    num_agent_steps_trained: 234000
    num_steps_sampled: 234000
    num_steps_trained: 234000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,234,5662.1,234000,-41.79,-24.2,-62,417.9


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 235000
  custom_metrics: {}
  date: 2021-10-21_21-20-05
  done: false
  episode_len_mean: 423.51
  episode_media: {}
  episode_reward_max: -24.200000000000074
  episode_reward_mean: -42.35100000000034
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 627
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.955018654584363e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8328337954150306
          entropy_coeff: 0.009999999999999998
          kl: 0.011614717152304986
          policy_loss: 0.11425507995817397
          total_loss: 0.6803617568479644
          vf_explained_var: -0.17472434043884277
          vf_loss: 0.5744349727200138
    num_agent_steps_sampled: 235000
    num_agent_steps_trained: 235000
    num_steps_sampled: 235000
    num_steps_trained: 235000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,235,5678.85,235000,-42.351,-24.2,-62,423.51


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 236000
  custom_metrics: {}
  date: 2021-10-21_21-20-23
  done: false
  episode_len_mean: 426.23
  episode_media: {}
  episode_reward_max: -24.200000000000074
  episode_reward_mean: -42.62300000000034
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 1
  episodes_total: 628
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.955018654584363e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8463229086663988
          entropy_coeff: 0.009999999999999998
          kl: 0.007630196566636964
          policy_loss: -0.04360192086961534
          total_loss: 0.6918882608413697
          vf_explained_var: -0.261555939912796
          vf_loss: 0.7439533861974875
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_steps_sampled: 236000
    num_steps_trained: 236000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,236,5696.78,236000,-42.623,-24.2,-62,426.23




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 237000
  custom_metrics: {}
  date: 2021-10-21_21-20-55
  done: false
  episode_len_mean: 431.74
  episode_media: {}
  episode_reward_max: -24.400000000000077
  episode_reward_mean: -43.17400000000035
  episode_reward_min: -62.00000000000061
  episodes_this_iter: 2
  episodes_total: 630
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.955018654584363e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8780351493093702
          entropy_coeff: 0.009999999999999998
          kl: 0.00837385130970792
          policy_loss: -0.05738272352351083
          total_loss: 0.7933981751402219
          vf_explained_var: -0.07117484509944916
          vf_loss: 0.8595612104154295
    num_agent_steps_sampled: 237000
    num_agent_steps_trained: 237000
    num_steps_sampled: 237000
    num_steps_trained: 237000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,237,5729.38,237000,-43.174,-24.4,-62,431.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 238000
  custom_metrics: {}
  date: 2021-10-21_21-21-12
  done: false
  episode_len_mean: 438.83
  episode_media: {}
  episode_reward_max: -24.400000000000077
  episode_reward_mean: -43.88300000000035
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 632
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.955018654584363e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8608194549878438
          entropy_coeff: 0.009999999999999998
          kl: 0.006702023064577128
          policy_loss: -0.08383636640177833
          total_loss: 1.3414254440201654
          vf_explained_var: -0.21684469282627106
          vf_loss: 1.4338699666990173
    num_agent_steps_sampled: 238000
    num_agent_steps_trained: 238000
    num_steps_sampled: 238000
    num_steps_trained: 238000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,238,5745.99,238000,-43.883,-24.4,-63.6,438.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 239000
  custom_metrics: {}
  date: 2021-10-21_21-21-28
  done: false
  episode_len_mean: 445.36
  episode_media: {}
  episode_reward_max: -24.400000000000077
  episode_reward_mean: -44.53600000000037
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 634
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.955018654584363e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8583303398556179
          entropy_coeff: 0.009999999999999998
          kl: 0.006356473877432971
          policy_loss: 0.09488223178519142
          total_loss: 0.8033987558550305
          vf_explained_var: -0.40830501914024353
          vf_loss: 0.717099795728508
    num_agent_steps_sampled: 239000
    num_agent_steps_trained: 239000
    num_steps_sampled: 239000
    num_steps_trained: 239000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,239,5761.54,239000,-44.536,-24.4,-63.6,445.36


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-10-21_21-21-43
  done: false
  episode_len_mean: 448.77
  episode_media: {}
  episode_reward_max: -24.400000000000077
  episode_reward_mean: -44.87700000000037
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 1
  episodes_total: 635
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.955018654584363e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8349413990974426
          entropy_coeff: 0.009999999999999998
          kl: 0.020980453870228486
          policy_loss: -0.05414378394683202
          total_loss: 0.6965737625956535
          vf_explained_var: -0.6063706278800964
          vf_loss: 0.7590668592808976
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,240,5777.01,240000,-44.877,-24.4,-63.6,448.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 241000
  custom_metrics: {}
  date: 2021-10-21_21-22-01
  done: false
  episode_len_mean: 453.77
  episode_media: {}
  episode_reward_max: -24.400000000000077
  episode_reward_mean: -45.37700000000037
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 637
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.432527981876546e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8026387009355757
          entropy_coeff: 0.009999999999999998
          kl: 0.0069659588161515035
          policy_loss: -0.08471679124567244
          total_loss: 1.3435974230368932
          vf_explained_var: -0.02075696736574173
          vf_loss: 1.4363405511611038
    num_agent_steps_sampled: 241000
    num_agent_steps_trained: 241000
    num_steps_sampled: 241000
    num_steps_trained: 241000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,241,5795.04,241000,-45.377,-24.4,-63.6,453.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 242000
  custom_metrics: {}
  date: 2021-10-21_21-22-15
  done: false
  episode_len_mean: 459.24
  episode_media: {}
  episode_reward_max: -24.400000000000077
  episode_reward_mean: -45.924000000000376
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 639
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.432527981876546e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.785987173848682
          entropy_coeff: 0.009999999999999998
          kl: 0.00237052556076761
          policy_loss: -0.075823465651936
          total_loss: 1.3301526453759935
          vf_explained_var: -0.2649857699871063
          vf_loss: 1.413835960212681
    num_agent_steps_sampled: 242000
    num_agent_steps_trained: 242000
    num_steps_sampled: 242000
    num_steps_trained: 242000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,242,5809.26,242000,-45.924,-24.4,-63.6,459.24


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 243000
  custom_metrics: {}
  date: 2021-10-21_21-22-29
  done: false
  episode_len_mean: 466.76
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -46.676000000000386
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 641
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.716263990938273e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8521682500839234
          entropy_coeff: 0.009999999999999998
          kl: 0.008818534876057112
          policy_loss: 0.09960328771008385
          total_loss: 0.8158472372425927
          vf_explained_var: -0.5896406173706055
          vf_loss: 0.7247655780778991
    num_agent_steps_sampled: 243000
    num_agent_steps_trained: 243000
    num_steps_sampled: 243000
    num_steps_trained: 243000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,243,5823.16,243000,-46.676,-24.8,-63.6,466.76


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 244000
  custom_metrics: {}
  date: 2021-10-21_21-22-46
  done: false
  episode_len_mean: 472.38
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -47.23800000000039
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 643
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.716263990938273e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8467639101876153
          entropy_coeff: 0.009999999999999998
          kl: 0.0038270418469277304
          policy_loss: 0.10683913512362374
          total_loss: 0.8496376868751314
          vf_explained_var: -0.6536217331886292
          vf_loss: 0.7512661625734633
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_steps_sampled: 244000
    num_steps_trained: 244000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,244,5840.04,244000,-47.238,-24.8,-63.6,472.38


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 245000
  custom_metrics: {}
  date: 2021-10-21_21-23-01
  done: false
  episode_len_mean: 475.91
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -47.591000000000406
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 1
  episodes_total: 644
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8581319954691366e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8822942442364163
          entropy_coeff: 0.009999999999999998
          kl: 0.011317852399061204
          policy_loss: -0.054165622426403895
          total_loss: 0.7024423418773545
          vf_explained_var: -0.6557899713516235
          vf_loss: 0.7654308917621772
    num_agent_steps_sampled: 245000
    num_agent_steps_trained: 245000
    num_steps_sampled: 245000
    num_steps_trained: 245000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,245,5854.76,245000,-47.591,-24.8,-63.6,475.91


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 246000
  custom_metrics: {}
  date: 2021-10-21_21-23-18
  done: false
  episode_len_mean: 482.2
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -48.22000000000041
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 646
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8581319954691366e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8780993951691521
          entropy_coeff: 0.009999999999999998
          kl: 0.017470649244071032
          policy_loss: -0.0867019494374593
          total_loss: 1.3675987614525689
          vf_explained_var: -0.31953880190849304
          vf_loss: 1.4630816876888275
    num_agent_steps_sampled: 246000
    num_agent_steps_trained: 246000
    num_steps_sampled: 246000
    num_steps_trained: 246000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,246,5871.5,246000,-48.22,-24.8,-63.6,482.2


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 247000
  custom_metrics: {}
  date: 2021-10-21_21-23-37
  done: false
  episode_len_mean: 486.14
  episode_media: {}
  episode_reward_max: -25.200000000000088
  episode_reward_mean: -48.61400000000042
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 648
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8581319954691366e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8285002178615994
          entropy_coeff: 0.009999999999999998
          kl: 0.0053388267031417615
          policy_loss: -0.07563206735584471
          total_loss: 1.355294057395723
          vf_explained_var: -0.3055429458618164
          vf_loss: 1.4392111391656928
    num_agent_steps_sampled: 247000
    num_agent_steps_trained: 247000
    num_steps_sampled: 247000
    num_steps_trained: 247000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,247,5891.08,247000,-48.614,-25.2,-63.6,486.14


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 248000
  custom_metrics: {}
  date: 2021-10-21_21-23-58
  done: false
  episode_len_mean: 490.46
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -49.04600000000043
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 3
  episodes_total: 651
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8581319954691366e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8156237661838531
          entropy_coeff: 0.009999999999999998
          kl: 0.007537034556442674
          policy_loss: 0.04509368273946974
          total_loss: 1.3991135077344046
          vf_explained_var: -0.21391311287879944
          vf_loss: 1.3621760693689187
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_steps_sampled: 248000
    num_steps_trained: 248000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,248,5911.85,248000,-49.046,-32.9,-63.6,490.46


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 249000
  custom_metrics: {}
  date: 2021-10-21_21-24-19
  done: false
  episode_len_mean: 491.06
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -49.10600000000043
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 653
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8581319954691366e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7946351130803426
          entropy_coeff: 0.009999999999999998
          kl: 0.010674678135017856
          policy_loss: -0.0870734559165107
          total_loss: 1.2655279924472174
          vf_explained_var: -0.26736509799957275
          vf_loss: 1.3605477526783942
    num_agent_steps_sampled: 249000
    num_agent_steps_trained: 249000
    num_steps_sampled: 249000
    num_steps_trained: 249000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,249,5932.3,249000,-49.106,-32.9,-63.6,491.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 250000
  custom_metrics: {}
  date: 2021-10-21_21-24-37
  done: false
  episode_len_mean: 492.03
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -49.20300000000043
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 655
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8581319954691366e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8574076526694827
          entropy_coeff: 0.009999999999999998
          kl: 0.02860919610686897
          policy_loss: -0.06460936797989739
          total_loss: 1.3395777808295355
          vf_explained_var: -0.06408393383026123
          vf_loss: 1.4127611767086719
    num_agent_steps_sampled: 250000
    num_agent_steps_trained: 250000
    num_steps_sampled: 250000
    num_steps_trained: 250000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,250,5950.68,250000,-49.203,-32.9,-63.6,492.03


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 251000
  custom_metrics: {}
  date: 2021-10-21_21-24-59
  done: false
  episode_len_mean: 490.69
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -49.069000000000436
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 3
  episodes_total: 658
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7871979932037063e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8420250203874377
          entropy_coeff: 0.009999999999999998
          kl: 0.009053007806197083
          policy_loss: 0.042645543813705444
          total_loss: 1.3903677973482345
          vf_explained_var: 0.009040843695402145
          vf_loss: 1.3561424877908494
    num_agent_steps_sampled: 251000
    num_agent_steps_trained: 251000
    num_steps_sampled: 251000
    num_steps_trained: 251000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,251,5972.51,251000,-49.069,-32.9,-63.6,490.69




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 252000
  custom_metrics: {}
  date: 2021-10-21_21-25-36
  done: false
  episode_len_mean: 488.4
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -48.84000000000043
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 660
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7871979932037063e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7964633948273129
          entropy_coeff: 0.009999999999999998
          kl: 0.010995041144692507
          policy_loss: -0.08945171568128797
          total_loss: 1.2508762203984791
          vf_explained_var: -0.025819402188062668
          vf_loss: 1.3482925386064581
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_steps_sampled: 252000
    num_steps_trained: 252000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,252,6010.12,252000,-48.84,-32.9,-63.6,488.4


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 253000
  custom_metrics: {}
  date: 2021-10-21_21-25-58
  done: false
  episode_len_mean: 485.85
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -48.585000000000406
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 3
  episodes_total: 663
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7871979932037063e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7607304467095269
          entropy_coeff: 0.009999999999999998
          kl: 0.008210702239270281
          policy_loss: 0.046715622312492794
          total_loss: 1.3704269246922598
          vf_explained_var: -0.09825977683067322
          vf_loss: 1.3313185597459476
    num_agent_steps_sampled: 253000
    num_agent_steps_trained: 253000
    num_steps_sampled: 253000
    num_steps_trained: 253000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,253,6031.78,253000,-48.585,-32.9,-63.6,485.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 254000
  custom_metrics: {}
  date: 2021-10-21_21-26-17
  done: false
  episode_len_mean: 483.99
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -48.39900000000041
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 665
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7871979932037063e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8269774066077338
          entropy_coeff: 0.009999999999999998
          kl: 0.012549026159199568
          policy_loss: 0.11098998172415628
          total_loss: 0.7636216147078408
          vf_explained_var: -0.647041380405426
          vf_loss: 0.6609013862493965
    num_agent_steps_sampled: 254000
    num_agent_steps_trained: 254000
    num_steps_sampled: 254000
    num_steps_trained: 254000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,254,6051.12,254000,-48.399,-32.9,-63.6,483.99


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 255000
  custom_metrics: {}
  date: 2021-10-21_21-26-39
  done: false
  episode_len_mean: 481.55
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -48.15500000000042
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 667
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7871979932037063e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9168813235229916
          entropy_coeff: 0.009999999999999998
          kl: 0.007906881571858785
          policy_loss: -0.08440053595436944
          total_loss: 1.3091719855864843
          vf_explained_var: -0.3105792999267578
          vf_loss: 1.402741316292021
    num_agent_steps_sampled: 255000
    num_agent_steps_trained: 255000
    num_steps_sampled: 255000
    num_steps_trained: 255000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,255,6072.14,255000,-48.155,-32.9,-63.6,481.55


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 256000
  custom_metrics: {}
  date: 2021-10-21_21-27-00
  done: false
  episode_len_mean: 476.72
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -47.67200000000041
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 3
  episodes_total: 670
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7871979932037063e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8607888473404779
          entropy_coeff: 0.009999999999999998
          kl: 0.007729944986102232
          policy_loss: 0.043954575806856154
          total_loss: 1.4206776996453603
          vf_explained_var: -0.2980383634567261
          vf_loss: 1.3853310026228427
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_steps_sampled: 256000
    num_steps_trained: 256000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,256,6093.15,256000,-47.672,-32.9,-63.6,476.72


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 257000
  custom_metrics: {}
  date: 2021-10-21_21-27-18
  done: false
  episode_len_mean: 475.43
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -47.543000000000404
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 672
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7871979932037063e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9066850231753455
          entropy_coeff: 0.009999999999999998
          kl: 0.0139761357480255
          policy_loss: 0.091874750620789
          total_loss: 0.8089049620760812
          vf_explained_var: -0.648349940776825
          vf_loss: 0.7260970231559541
    num_agent_steps_sampled: 257000
    num_agent_steps_trained: 257000
    num_steps_sampled: 257000
    num_steps_trained: 257000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,257,6111.65,257000,-47.543,-32.9,-63.6,475.43


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 258000
  custom_metrics: {}
  date: 2021-10-21_21-27-38
  done: false
  episode_len_mean: 471.77
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -47.1770000000004
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 674
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7871979932037063e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8289985020955404
          entropy_coeff: 0.009999999999999998
          kl: 0.013436030563230162
          policy_loss: -0.07941014187203513
          total_loss: 1.275999626186159
          vf_explained_var: -0.28244486451148987
          vf_loss: 1.3636996945573223
    num_agent_steps_sampled: 258000
    num_agent_steps_trained: 258000
    num_steps_sampled: 258000
    num_steps_trained: 258000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,258,6132.03,258000,-47.177,-32.9,-63.6,471.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 259000
  custom_metrics: {}
  date: 2021-10-21_21-27-58
  done: false
  episode_len_mean: 468.8
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -46.88000000000039
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 3
  episodes_total: 677
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7871979932037063e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.798566777838601
          entropy_coeff: 0.009999999999999998
          kl: 0.010863222564321005
          policy_loss: 0.03526545266310374
          total_loss: 1.3551497220993043
          vf_explained_var: 0.06532992422580719
          vf_loss: 1.327869909339481
    num_agent_steps_sampled: 259000
    num_agent_steps_trained: 259000
    num_steps_sampled: 259000
    num_steps_trained: 259000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,259,6151.95,259000,-46.88,-32.9,-63.6,468.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-10-21_21-28-18
  done: false
  episode_len_mean: 467.34
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -46.7340000000004
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 679
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7871979932037063e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.835277282529407
          entropy_coeff: 0.009999999999999998
          kl: 0.00911126107170443
          policy_loss: 0.0896714770131641
          total_loss: 0.8272130328747961
          vf_explained_var: 0.04743553325533867
          vf_loss: 0.7458943174738023
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 260000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,260,6171.41,260000,-46.734,-32.9,-63.6,467.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 261000
  custom_metrics: {}
  date: 2021-10-21_21-28-36
  done: false
  episode_len_mean: 465.94
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -46.5940000000004
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 681
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7871979932037063e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7894550687736935
          entropy_coeff: 0.009999999999999998
          kl: 0.009700182851975742
          policy_loss: 0.08828787505626678
          total_loss: 0.8271807077858183
          vf_explained_var: -0.6155158281326294
          vf_loss: 0.7467873582616449
    num_agent_steps_sampled: 261000
    num_agent_steps_trained: 261000
    num_steps_sampled: 261000
    num_steps_trained: 261000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,261,6189.01,261000,-46.594,-32.9,-63.6,465.94


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 262000
  custom_metrics: {}
  date: 2021-10-21_21-28-52
  done: false
  episode_len_mean: 465.43
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -46.54300000000038
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 683
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7871979932037063e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7820225344763861
          entropy_coeff: 0.009999999999999998
          kl: 0.006416469881759923
          policy_loss: 0.09499953091144561
          total_loss: 0.8598986052804523
          vf_explained_var: -0.6062456369400024
          vf_loss: 0.772719292032222
    num_agent_steps_sampled: 262000
    num_agent_steps_trained: 262000
    num_steps_sampled: 262000
    num_steps_trained: 262000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,262,6205.82,262000,-46.543,-32.9,-63.6,465.43


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 263000
  custom_metrics: {}
  date: 2021-10-21_21-29-08
  done: false
  episode_len_mean: 465.59
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -46.55900000000038
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 1
  episodes_total: 684
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7871979932037063e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.791465648677614
          entropy_coeff: 0.009999999999999998
          kl: 0.00733377282923742
          policy_loss: -0.0527511371506585
          total_loss: 0.707469581398699
          vf_explained_var: -0.6278718113899231
          vf_loss: 0.7681353436369035
    num_agent_steps_sampled: 263000
    num_agent_steps_trained: 263000
    num_steps_sampled: 263000
    num_steps_trained: 263000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,263,6221.63,263000,-46.559,-32.9,-63.6,465.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 264000
  custom_metrics: {}
  date: 2021-10-21_21-29-24
  done: false
  episode_len_mean: 465.29
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -46.529000000000394
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 686
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7871979932037063e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.769410232702891
          entropy_coeff: 0.009999999999999998
          kl: 0.008673192947785917
          policy_loss: -0.056690292474296355
          total_loss: 1.22133588956462
          vf_explained_var: -0.31233036518096924
          vf_loss: 1.2857202742248774
    num_agent_steps_sampled: 264000
    num_agent_steps_trained: 264000
    num_steps_sampled: 264000
    num_steps_trained: 264000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,264,6237.6,264000,-46.529,-32.9,-63.6,465.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 265000
  custom_metrics: {}
  date: 2021-10-21_21-29-42
  done: false
  episode_len_mean: 465.3
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -46.53000000000039
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 688
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7871979932037063e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7871431569258373
          entropy_coeff: 0.009999999999999998
          kl: 0.00507721631973964
          policy_loss: -0.08433068626456791
          total_loss: 1.3636180616087383
          vf_explained_var: -0.29103901982307434
          vf_loss: 1.4558201656573349
    num_agent_steps_sampled: 265000
    num_agent_steps_trained: 265000
    num_steps_sampled: 265000
    num_steps_trained: 265000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,265,6254.89,265000,-46.53,-32.9,-63.6,465.3




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 266000
  custom_metrics: {}
  date: 2021-10-21_21-30-16
  done: false
  episode_len_mean: 463.68
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -46.36800000000039
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 690
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7871979932037063e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7983274062474569
          entropy_coeff: 0.009999999999999998
          kl: 0.0065018122285006065
          policy_loss: -0.08108512494299147
          total_loss: 1.3140002727508544
          vf_explained_var: -0.29809945821762085
          vf_loss: 1.4030686197181543
    num_agent_steps_sampled: 266000
    num_agent_steps_trained: 266000
    num_steps_sampled: 266000
    num_steps_trained: 266000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,266,6289.03,266000,-46.368,-32.9,-63.6,463.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 267000
  custom_metrics: {}
  date: 2021-10-21_21-30-32
  done: false
  episode_len_mean: 463.68
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -46.36800000000039
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 692
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7871979932037063e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8120746546321445
          entropy_coeff: 0.009999999999999998
          kl: 0.0042970819945244544
          policy_loss: 0.06437429206238852
          total_loss: 0.827733822994762
          vf_explained_var: -0.3396519422531128
          vf_loss: 0.7714802594234546
    num_agent_steps_sampled: 267000
    num_agent_steps_trained: 267000
    num_steps_sampled: 267000
    num_steps_trained: 267000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,267,6305.27,267000,-46.368,-32.9,-63.6,463.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 268000
  custom_metrics: {}
  date: 2021-10-21_21-30-48
  done: false
  episode_len_mean: 464.17
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -46.4170000000004
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 694
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3935989966018532e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8533875160747104
          entropy_coeff: 0.009999999999999998
          kl: 0.008605845716005709
          policy_loss: 0.09842500189940134
          total_loss: 0.8820315238502291
          vf_explained_var: -0.6606636047363281
          vf_loss: 0.7921403998819491
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_steps_sampled: 268000
    num_steps_trained: 268000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,268,6321.07,268000,-46.417,-32.9,-63.6,464.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 269000
  custom_metrics: {}
  date: 2021-10-21_21-31-05
  done: false
  episode_len_mean: 463.47
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -46.3470000000004
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 696
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3935989966018532e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9034995873769124
          entropy_coeff: 0.009999999999999998
          kl: 0.00984081941999786
          policy_loss: 0.1191604802178012
          total_loss: 0.7784635949465963
          vf_explained_var: -0.4628842771053314
          vf_loss: 0.6683381142612133
    num_agent_steps_sampled: 269000
    num_agent_steps_trained: 269000
    num_steps_sampled: 269000
    num_steps_trained: 269000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,269,6338.3,269000,-46.347,-32.9,-63.6,463.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 270000
  custom_metrics: {}
  date: 2021-10-21_21-31-25
  done: false
  episode_len_mean: 462.3
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -46.23000000000039
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 698
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3935989966018532e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.863586214515898
          entropy_coeff: 0.009999999999999998
          kl: 0.013430337471243645
          policy_loss: -0.08870503306388855
          total_loss: 1.421735735734304
          vf_explained_var: -0.22771163284778595
          vf_loss: 1.5190766375925806
    num_agent_steps_sampled: 270000
    num_agent_steps_trained: 270000
    num_steps_sampled: 270000
    num_steps_trained: 270000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,270,6357.79,270000,-46.23,-32.9,-63.6,462.3


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 271000
  custom_metrics: {}
  date: 2021-10-21_21-31-42
  done: false
  episode_len_mean: 463.41
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -46.34100000000039
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 700
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3935989966018532e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8073749873373244
          entropy_coeff: 0.009999999999999998
          kl: 0.009228869057338409
          policy_loss: -0.08164733383390639
          total_loss: 1.3935953501198026
          vf_explained_var: -0.30423402786254883
          vf_loss: 1.4833164339264233
    num_agent_steps_sampled: 271000
    num_agent_steps_trained: 271000
    num_steps_sampled: 271000
    num_steps_trained: 271000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,271,6375.6,271000,-46.341,-32.9,-63.6,463.41


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 272000
  custom_metrics: {}
  date: 2021-10-21_21-32-01
  done: false
  episode_len_mean: 465.73
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -46.57300000000039
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 702
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3935989966018532e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7809336655669742
          entropy_coeff: 0.009999999999999998
          kl: 0.006284344025944089
          policy_loss: -0.07897889730003145
          total_loss: 1.2924740460183886
          vf_explained_var: -0.07901634275913239
          vf_loss: 1.3792622769044505
    num_agent_steps_sampled: 272000
    num_agent_steps_trained: 272000
    num_steps_sampled: 272000
    num_steps_trained: 272000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,272,6393.86,272000,-46.573,-32.9,-63.6,465.73


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 273000
  custom_metrics: {}
  date: 2021-10-21_21-32-18
  done: false
  episode_len_mean: 468.14
  episode_media: {}
  episode_reward_max: -32.9000000000002
  episode_reward_mean: -46.8140000000004
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 704
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3935989966018532e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7411084155241648
          entropy_coeff: 0.009999999999999998
          kl: 0.006803978488557445
          policy_loss: -0.08458067460192574
          total_loss: 1.3266803125540416
          vf_explained_var: -0.2977108955383301
          vf_loss: 1.4186720855534076
    num_agent_steps_sampled: 273000
    num_agent_steps_trained: 273000
    num_steps_sampled: 273000
    num_steps_trained: 273000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,273,6410.72,273000,-46.814,-32.9,-63.6,468.14


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 274000
  custom_metrics: {}
  date: 2021-10-21_21-32-36
  done: false
  episode_len_mean: 470.88
  episode_media: {}
  episode_reward_max: -33.60000000000021
  episode_reward_mean: -47.08800000000039
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 706
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3935989966018532e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6811087965965271
          entropy_coeff: 0.009999999999999998
          kl: 0.008713695911247715
          policy_loss: -0.0903559245997005
          total_loss: 1.3055498169528112
          vf_explained_var: 0.052449699491262436
          vf_loss: 1.4027168202731344
    num_agent_steps_sampled: 274000
    num_agent_steps_trained: 274000
    num_steps_sampled: 274000
    num_steps_trained: 274000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,274,6429.51,274000,-47.088,-33.6,-63.6,470.88


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 275000
  custom_metrics: {}
  date: 2021-10-21_21-32-54
  done: false
  episode_len_mean: 473.77
  episode_media: {}
  episode_reward_max: -35.10000000000023
  episode_reward_mean: -47.377000000000386
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 708
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3935989966018532e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6356075902779897
          entropy_coeff: 0.009999999999999998
          kl: 0.0044187028514612574
          policy_loss: -0.06920618928141065
          total_loss: 1.2910995132393308
          vf_explained_var: 0.001115994295105338
          vf_loss: 1.3666618060734537
    num_agent_steps_sampled: 275000
    num_agent_steps_trained: 275000
    num_steps_sampled: 275000
    num_steps_trained: 275000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,275,6447.03,275000,-47.377,-35.1,-63.6,473.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 276000
  custom_metrics: {}
  date: 2021-10-21_21-33-11
  done: false
  episode_len_mean: 476.59
  episode_media: {}
  episode_reward_max: -35.10000000000023
  episode_reward_mean: -47.659000000000404
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 710
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.967994983009266e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6348250084453159
          entropy_coeff: 0.009999999999999998
          kl: 0.008166060938243626
          policy_loss: -0.08145742995871438
          total_loss: 1.2513315939240985
          vf_explained_var: -0.0762266293168068
          vf_loss: 1.339137260367473
    num_agent_steps_sampled: 276000
    num_agent_steps_trained: 276000
    num_steps_sampled: 276000
    num_steps_trained: 276000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,276,6464.52,276000,-47.659,-35.1,-63.6,476.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 277000
  custom_metrics: {}
  date: 2021-10-21_21-33-28
  done: false
  episode_len_mean: 480.16
  episode_media: {}
  episode_reward_max: -35.10000000000023
  episode_reward_mean: -48.01600000000041
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 712
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.967994983009266e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6154526876078712
          entropy_coeff: 0.009999999999999998
          kl: 0.0060302986633793704
          policy_loss: -0.06979676485061645
          total_loss: 1.2437650710344315
          vf_explained_var: -0.25742143392562866
          vf_loss: 1.3197163520587816
    num_agent_steps_sampled: 277000
    num_agent_steps_trained: 277000
    num_steps_sampled: 277000
    num_steps_trained: 277000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,277,6481.07,277000,-48.016,-35.1,-63.6,480.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 278000
  custom_metrics: {}
  date: 2021-10-21_21-33-45
  done: false
  episode_len_mean: 483.15
  episode_media: {}
  episode_reward_max: -35.500000000000234
  episode_reward_mean: -48.31500000000042
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 714
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.967994983009266e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6868778977129194
          entropy_coeff: 0.009999999999999998
          kl: 0.007833146838359875
          policy_loss: -0.08983481923739116
          total_loss: 1.169201691614257
          vf_explained_var: -0.229225754737854
          vf_loss: 1.2659052948156992
    num_agent_steps_sampled: 278000
    num_agent_steps_trained: 278000
    num_steps_sampled: 278000
    num_steps_trained: 278000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,278,6498.48,278000,-48.315,-35.5,-63.6,483.15


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 279000
  custom_metrics: {}
  date: 2021-10-21_21-34-06
  done: false
  episode_len_mean: 484.17
  episode_media: {}
  episode_reward_max: -37.10000000000026
  episode_reward_mean: -48.417000000000414
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 3
  episodes_total: 717
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.967994983009266e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.708159065246582
          entropy_coeff: 0.009999999999999998
          kl: 0.007882387977418049
          policy_loss: 0.03524950138396687
          total_loss: 1.29970220790969
          vf_explained_var: -0.130755215883255
          vf_loss: 1.2715342934760783
    num_agent_steps_sampled: 279000
    num_agent_steps_trained: 279000
    num_steps_sampled: 279000
    num_steps_trained: 279000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,279,6518.8,279000,-48.417,-37.1,-63.6,484.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-10-21_21-34-24
  done: false
  episode_len_mean: 485.57
  episode_media: {}
  episode_reward_max: -37.10000000000026
  episode_reward_mean: -48.55700000000043
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 719
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.967994983009266e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7262449714872572
          entropy_coeff: 0.009999999999999998
          kl: 0.01962439341101792
          policy_loss: 0.09592285934421751
          total_loss: 0.7468510405884848
          vf_explained_var: -0.10945656895637512
          vf_loss: 0.6581906158787508
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,280,6536.88,280000,-48.557,-37.1,-63.6,485.57




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 281000
  custom_metrics: {}
  date: 2021-10-21_21-35-03
  done: false
  episode_len_mean: 485.01
  episode_media: {}
  episode_reward_max: -37.10000000000026
  episode_reward_mean: -48.501000000000424
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 721
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.967994983009266e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7095840646160974
          entropy_coeff: 0.009999999999999998
          kl: 0.008741045570181032
          policy_loss: -0.09626961681577895
          total_loss: 1.1595053136348725
          vf_explained_var: -0.04982084035873413
          vf_loss: 1.2628707830276755
    num_agent_steps_sampled: 281000
    num_agent_steps_trained: 281000
    num_steps_sampled: 281000
    num_steps_trained: 281000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,281,6576.3,281000,-48.501,-37.1,-63.6,485.01


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 282000
  custom_metrics: {}
  date: 2021-10-21_21-35-25
  done: false
  episode_len_mean: 483.87
  episode_media: {}
  episode_reward_max: -35.40000000000023
  episode_reward_mean: -48.38700000000042
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 3
  episodes_total: 724
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.967994983009266e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7339332905080583
          entropy_coeff: 0.009999999999999998
          kl: 0.008457525195932744
          policy_loss: 0.033584180722634
          total_loss: 1.3077088508341048
          vf_explained_var: 0.14521242678165436
          vf_loss: 1.2814639968176682
    num_agent_steps_sampled: 282000
    num_agent_steps_trained: 282000
    num_steps_sampled: 282000
    num_steps_trained: 282000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,282,6598.08,282000,-48.387,-35.4,-63.6,483.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 283000
  custom_metrics: {}
  date: 2021-10-21_21-35-47
  done: false
  episode_len_mean: 480.12
  episode_media: {}
  episode_reward_max: -35.40000000000023
  episode_reward_mean: -48.01200000000042
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 3
  episodes_total: 727
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.967994983009266e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7824516389105055
          entropy_coeff: 0.009999999999999998
          kl: 0.011278443098594772
          policy_loss: 0.07409950097401936
          total_loss: 1.154342704017957
          vf_explained_var: -0.18816359341144562
          vf_loss: 1.0880677284465896
    num_agent_steps_sampled: 283000
    num_agent_steps_trained: 283000
    num_steps_sampled: 283000
    num_steps_trained: 283000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,283,6620.03,283000,-48.012,-35.4,-63.6,480.12


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 284000
  custom_metrics: {}
  date: 2021-10-21_21-36-10
  done: false
  episode_len_mean: 477.22
  episode_media: {}
  episode_reward_max: -35.40000000000023
  episode_reward_mean: -47.7220000000004
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 2
  episodes_total: 729
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.967994983009266e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8480761170387268
          entropy_coeff: 0.009999999999999998
          kl: 0.010920599863287785
          policy_loss: -0.0852673288848665
          total_loss: 1.1583147317171096
          vf_explained_var: -0.2829088568687439
          vf_loss: 1.2520628170834647
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_steps_sampled: 284000
    num_steps_trained: 284000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,284,6642.56,284000,-47.722,-35.4,-63.6,477.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 285000
  custom_metrics: {}
  date: 2021-10-21_21-36-33
  done: false
  episode_len_mean: 471.47
  episode_media: {}
  episode_reward_max: -35.40000000000023
  episode_reward_mean: -47.147000000000396
  episode_reward_min: -63.40000000000063
  episodes_this_iter: 3
  episodes_total: 732
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.967994983009266e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8496553268697526
          entropy_coeff: 0.009999999999999998
          kl: 0.009899555341172374
          policy_loss: 0.03832941568560071
          total_loss: 1.3024091217252942
          vf_explained_var: -0.28607720136642456
          vf_loss: 1.2725762635469438
    num_agent_steps_sampled: 285000
    num_agent_steps_trained: 285000
    num_steps_sampled: 285000
    num_steps_trained: 285000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,285,6666.13,285000,-47.147,-35.4,-63.4,471.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 286000
  custom_metrics: {}
  date: 2021-10-21_21-36-58
  done: false
  episode_len_mean: 464.08
  episode_media: {}
  episode_reward_max: -33.90000000000021
  episode_reward_mean: -46.40800000000039
  episode_reward_min: -63.40000000000063
  episodes_this_iter: 3
  episodes_total: 735
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.967994983009266e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8173039376735687
          entropy_coeff: 0.009999999999999998
          kl: 0.010083889769074952
          policy_loss: 0.04919014366136657
          total_loss: 1.2999684393405915
          vf_explained_var: -0.01278500072658062
          vf_loss: 1.2589513455828032
    num_agent_steps_sampled: 286000
    num_agent_steps_trained: 286000
    num_steps_sampled: 286000
    num_steps_trained: 286000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,286,6690.75,286000,-46.408,-33.9,-63.4,464.08


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 287000
  custom_metrics: {}
  date: 2021-10-21_21-37-21
  done: false
  episode_len_mean: 458.87
  episode_media: {}
  episode_reward_max: -33.90000000000021
  episode_reward_mean: -45.88700000000037
  episode_reward_min: -63.40000000000063
  episodes_this_iter: 3
  episodes_total: 738
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.967994983009266e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7953152901596493
          entropy_coeff: 0.009999999999999998
          kl: 0.012769587280657645
          policy_loss: 0.09303984509574043
          total_loss: 0.8349309371577369
          vf_explained_var: -0.09727979451417923
          vf_loss: 0.7498442452400923
    num_agent_steps_sampled: 287000
    num_agent_steps_trained: 287000
    num_steps_sampled: 287000
    num_steps_trained: 287000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,287,6713.67,287000,-45.887,-33.9,-63.4,458.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 288000
  custom_metrics: {}
  date: 2021-10-21_21-37-46
  done: false
  episode_len_mean: 450.06
  episode_media: {}
  episode_reward_max: -30.100000000000158
  episode_reward_mean: -45.00600000000037
  episode_reward_min: -60.900000000000595
  episodes_this_iter: 3
  episodes_total: 741
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.967994983009266e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7563374638557434
          entropy_coeff: 0.009999999999999998
          kl: 0.0040248616764854556
          policy_loss: 0.048142327782180576
          total_loss: 1.3546321590741475
          vf_explained_var: 0.029572535306215286
          vf_loss: 1.3140532033310997
    num_agent_steps_sampled: 288000
    num_agent_steps_trained: 288000
    num_steps_sampled: 288000
    num_steps_trained: 288000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,288,6738.76,288000,-45.006,-30.1,-60.9,450.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 289000
  custom_metrics: {}
  date: 2021-10-21_21-38-11
  done: false
  episode_len_mean: 442.63
  episode_media: {}
  episode_reward_max: -30.100000000000158
  episode_reward_mean: -44.26300000000035
  episode_reward_min: -59.70000000000058
  episodes_this_iter: 3
  episodes_total: 744
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.483997491504633e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8633092873626285
          entropy_coeff: 0.009999999999999998
          kl: 0.0169380324853233
          policy_loss: 0.05421933134396871
          total_loss: 1.2636768427160052
          vf_explained_var: 0.057856034487485886
          vf_loss: 1.2180905861987008
    num_agent_steps_sampled: 289000
    num_agent_steps_trained: 289000
    num_steps_sampled: 289000
    num_steps_trained: 289000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,289,6764.1,289000,-44.263,-30.1,-59.7,442.63


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 290000
  custom_metrics: {}
  date: 2021-10-21_21-38-35
  done: false
  episode_len_mean: 437.32
  episode_media: {}
  episode_reward_max: -30.100000000000158
  episode_reward_mean: -43.732000000000355
  episode_reward_min: -58.50000000000056
  episodes_this_iter: 3
  episodes_total: 747
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.483997491504633e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.252007704310947
          entropy_coeff: 0.009999999999999998
          kl: 0.01883992031675564
          policy_loss: 0.08714889105823305
          total_loss: 1.0905304819345474
          vf_explained_var: -0.31163379549980164
          vf_loss: 1.0159016538411378
    num_agent_steps_sampled: 290000
    num_agent_steps_trained: 290000
    num_steps_sampled: 290000
    num_steps_trained: 290000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,290,6787.35,290000,-43.732,-30.1,-58.5,437.32




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 291000
  custom_metrics: {}
  date: 2021-10-21_21-39-14
  done: false
  episode_len_mean: 434.77
  episode_media: {}
  episode_reward_max: -30.100000000000158
  episode_reward_mean: -43.47700000000036
  episode_reward_min: -58.50000000000056
  episodes_this_iter: 3
  episodes_total: 750
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.483997491504633e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2771905978520712
          entropy_coeff: 0.009999999999999998
          kl: 0.009000314466352282
          policy_loss: 0.04029302067226834
          total_loss: 1.3702557057142257
          vf_explained_var: -0.022433388978242874
          vf_loss: 1.3427345923251577
    num_agent_steps_sampled: 291000
    num_agent_steps_trained: 291000
    num_steps_sampled: 291000
    num_steps_trained: 291000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,291,6827.18,291000,-43.477,-30.1,-58.5,434.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 292000
  custom_metrics: {}
  date: 2021-10-21_21-39-41
  done: false
  episode_len_mean: 432.54
  episode_media: {}
  episode_reward_max: -30.100000000000158
  episode_reward_mean: -43.25400000000035
  episode_reward_min: -58.50000000000056
  episodes_this_iter: 3
  episodes_total: 753
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.483997491504633e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2911231213145786
          entropy_coeff: 0.009999999999999998
          kl: 0.033438804249542896
          policy_loss: 0.03129623937937948
          total_loss: 1.3339035054047903
          vf_explained_var: -0.28013482689857483
          vf_loss: 1.3155184864997864
    num_agent_steps_sampled: 292000
    num_agent_steps_trained: 292000
    num_steps_sampled: 292000
    num_steps_trained: 292000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,292,6853.8,292000,-43.254,-30.1,-58.5,432.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 293000
  custom_metrics: {}
  date: 2021-10-21_21-40-02
  done: false
  episode_len_mean: 431.5
  episode_media: {}
  episode_reward_max: -30.100000000000158
  episode_reward_mean: -43.15000000000034
  episode_reward_min: -58.50000000000056
  episodes_this_iter: 2
  episodes_total: 755
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.225996237256945e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2650511887338427
          entropy_coeff: 0.009999999999999998
          kl: 0.021312811422982827
          policy_loss: 0.07030882587035497
          total_loss: 0.2275333207514551
          vf_explained_var: -0.15102991461753845
          vf_loss: 0.16987499684716265
    num_agent_steps_sampled: 293000
    num_agent_steps_trained: 293000
    num_steps_sampled: 293000
    num_steps_trained: 293000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,293,6874.97,293000,-43.15,-30.1,-58.5,431.5


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 294000
  custom_metrics: {}
  date: 2021-10-21_21-40-26
  done: false
  episode_len_mean: 429.92
  episode_media: {}
  episode_reward_max: -30.100000000000158
  episode_reward_mean: -42.992000000000345
  episode_reward_min: -58.50000000000056
  episodes_this_iter: 3
  episodes_total: 758
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.83899435588542e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2765229158931308
          entropy_coeff: 0.009999999999999998
          kl: 0.0101381938837074
          policy_loss: 0.04057262274954054
          total_loss: 1.4383304688665601
          vf_explained_var: 0.02666982263326645
          vf_loss: 1.4105230775144366
    num_agent_steps_sampled: 294000
    num_agent_steps_trained: 294000
    num_steps_sampled: 294000
    num_steps_trained: 294000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,294,6898.22,294000,-42.992,-30.1,-58.5,429.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 295000
  custom_metrics: {}
  date: 2021-10-21_21-40-49
  done: false
  episode_len_mean: 428.48
  episode_media: {}
  episode_reward_max: -30.100000000000158
  episode_reward_mean: -42.84800000000034
  episode_reward_min: -58.50000000000056
  episodes_this_iter: 3
  episodes_total: 761
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.83899435588542e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2579835361904568
          entropy_coeff: 0.009999999999999998
          kl: 0.013907633129771233
          policy_loss: 0.06952422310908636
          total_loss: 1.1516988181405596
          vf_explained_var: -0.29020577669143677
          vf_loss: 1.0947544202622441
    num_agent_steps_sampled: 295000
    num_agent_steps_trained: 295000
    num_steps_sampled: 295000
    num_steps_trained: 295000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,295,6921.66,295000,-42.848,-30.1,-58.5,428.48


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 296000
  custom_metrics: {}
  date: 2021-10-21_21-41-14
  done: false
  episode_len_mean: 425.76
  episode_media: {}
  episode_reward_max: -30.100000000000158
  episode_reward_mean: -42.576000000000334
  episode_reward_min: -58.50000000000056
  episodes_this_iter: 3
  episodes_total: 764
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.83899435588542e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.3180890189276802
          entropy_coeff: 0.009999999999999998
          kl: 0.02971045828023202
          policy_loss: 0.03851831903060277
          total_loss: 1.2536287079254786
          vf_explained_var: -0.14861226081848145
          vf_loss: 1.2282912504341867
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_steps_sampled: 296000
    num_steps_trained: 296000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,296,6946.32,296000,-42.576,-30.1,-58.5,425.76


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 297000
  custom_metrics: {}
  date: 2021-10-21_21-41-38
  done: false
  episode_len_mean: 423.55
  episode_media: {}
  episode_reward_max: -30.100000000000158
  episode_reward_mean: -42.35500000000034
  episode_reward_min: -58.50000000000056
  episodes_this_iter: 2
  episodes_total: 766
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1758491533828133e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.431678024927775
          entropy_coeff: 0.009999999999999998
          kl: 0.03043982199013508
          policy_loss: -0.08533534655968349
          total_loss: 1.2596523582935333
          vf_explained_var: 0.2544533908367157
          vf_loss: 1.3593044543431865
    num_agent_steps_sampled: 297000
    num_agent_steps_trained: 297000
    num_steps_sampled: 297000
    num_steps_trained: 297000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,297,6970.89,297000,-42.355,-30.1,-58.5,423.55


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 298000
  custom_metrics: {}
  date: 2021-10-21_21-42-01
  done: false
  episode_len_mean: 422.22
  episode_media: {}
  episode_reward_max: -30.100000000000158
  episode_reward_mean: -42.222000000000335
  episode_reward_min: -58.50000000000056
  episodes_this_iter: 3
  episodes_total: 769
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7637737300742189e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3094226095411512
          entropy_coeff: 0.009999999999999998
          kl: 0.011014942884300844
          policy_loss: -0.046802711155679494
          total_loss: 1.4023923655351003
          vf_explained_var: 0.11620461940765381
          vf_loss: 1.4622892810238732
    num_agent_steps_sampled: 298000
    num_agent_steps_trained: 298000
    num_steps_sampled: 298000
    num_steps_trained: 298000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,298,6993.94,298000,-42.222,-30.1,-58.5,422.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 299000
  custom_metrics: {}
  date: 2021-10-21_21-42-22
  done: false
  episode_len_mean: 420.59
  episode_media: {}
  episode_reward_max: -30.100000000000158
  episode_reward_mean: -42.05900000000033
  episode_reward_min: -58.50000000000056
  episodes_this_iter: 3
  episodes_total: 772
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7637737300742189e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3693933380974663
          entropy_coeff: 0.009999999999999998
          kl: 0.009762774221781564
          policy_loss: 0.046035656250185436
          total_loss: 1.3004276421335008
          vf_explained_var: 0.014069615863263607
          vf_loss: 1.26808589498202
    num_agent_steps_sampled: 299000
    num_agent_steps_trained: 299000
    num_steps_sampled: 299000
    num_steps_trained: 299000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,299,7014.77,299000,-42.059,-30.1,-58.5,420.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-10-21_21-42-42
  done: false
  episode_len_mean: 420.69
  episode_media: {}
  episode_reward_max: -30.100000000000158
  episode_reward_mean: -42.06900000000032
  episode_reward_min: -58.50000000000056
  episodes_this_iter: 2
  episodes_total: 774
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7637737300742189e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.325436744425032
          entropy_coeff: 0.009999999999999998
          kl: 0.009118221695303063
          policy_loss: -0.08555871645609538
          total_loss: 1.1201976352267795
          vf_explained_var: -0.2547319829463959
          vf_loss: 1.2190107157660854
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 300000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,300,7034.8,300000,-42.069,-30.1,-58.5,420.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 301000
  custom_metrics: {}
  date: 2021-10-21_21-43-06
  done: false
  episode_len_mean: 418.81
  episode_media: {}
  episode_reward_max: -30.100000000000158
  episode_reward_mean: -41.881000000000334
  episode_reward_min: -58.50000000000056
  episodes_this_iter: 3
  episodes_total: 777
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7637737300742189e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2518779701656766
          entropy_coeff: 0.009999999999999998
          kl: 0.007297204566362798
          policy_loss: 0.05335424509313372
          total_loss: 1.2424824608696832
          vf_explained_var: -0.2225457727909088
          vf_loss: 1.20164698412021
    num_agent_steps_sampled: 301000
    num_agent_steps_trained: 301000
    num_steps_sampled: 301000
    num_steps_trained: 301000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,301,7058.08,301000,-41.881,-30.1,-58.5,418.81




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 302000
  custom_metrics: {}
  date: 2021-10-21_21-43-47
  done: false
  episode_len_mean: 414.95
  episode_media: {}
  episode_reward_max: -29.900000000000155
  episode_reward_mean: -41.49500000000032
  episode_reward_min: -58.50000000000056
  episodes_this_iter: 3
  episodes_total: 780
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7637737300742189e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3410990251435173
          entropy_coeff: 0.009999999999999998
          kl: 0.010931894666071112
          policy_loss: 0.0685877349641588
          total_loss: 1.306506856944826
          vf_explained_var: 0.08717302978038788
          vf_loss: 1.2513300933771663
    num_agent_steps_sampled: 302000
    num_agent_steps_trained: 302000
    num_steps_sampled: 302000
    num_steps_trained: 302000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,302,7099.33,302000,-41.495,-29.9,-58.5,414.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 303000
  custom_metrics: {}
  date: 2021-10-21_21-44-12
  done: false
  episode_len_mean: 410.13
  episode_media: {}
  episode_reward_max: -29.900000000000155
  episode_reward_mean: -41.01300000000031
  episode_reward_min: -58.50000000000056
  episodes_this_iter: 3
  episodes_total: 783
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7637737300742189e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2414496395323011
          entropy_coeff: 0.009999999999999998
          kl: 0.011319046380719221
          policy_loss: 0.05193276918596691
          total_loss: 1.2482741875780954
          vf_explained_var: -0.23418447375297546
          vf_loss: 1.2087558921840456
    num_agent_steps_sampled: 303000
    num_agent_steps_trained: 303000
    num_steps_sampled: 303000
    num_steps_trained: 303000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,303,7124.67,303000,-41.013,-29.9,-58.5,410.13


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 304000
  custom_metrics: {}
  date: 2021-10-21_21-44-38
  done: false
  episode_len_mean: 403.02
  episode_media: {}
  episode_reward_max: -29.900000000000155
  episode_reward_mean: -40.3020000000003
  episode_reward_min: -56.50000000000053
  episodes_this_iter: 3
  episodes_total: 786
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7637737300742189e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1703251587020027
          entropy_coeff: 0.009999999999999998
          kl: 0.005696593971661122
          policy_loss: 0.06141934411393272
          total_loss: 1.3320941146877077
          vf_explained_var: -0.27762866020202637
          vf_loss: 1.2823780229522121
    num_agent_steps_sampled: 304000
    num_agent_steps_trained: 304000
    num_steps_sampled: 304000
    num_steps_trained: 304000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,304,7150.1,304000,-40.302,-29.9,-56.5,403.02


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 305000
  custom_metrics: {}
  date: 2021-10-21_21-45-02
  done: false
  episode_len_mean: 396.67
  episode_media: {}
  episode_reward_max: -29.900000000000155
  episode_reward_mean: -39.667000000000286
  episode_reward_min: -56.50000000000053
  episodes_this_iter: 3
  episodes_total: 789
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7637737300742189e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0715282618999482
          entropy_coeff: 0.009999999999999998
          kl: 0.006229871023568171
          policy_loss: 0.059663723740312785
          total_loss: 1.255794060230255
          vf_explained_var: -0.20510177314281464
          vf_loss: 1.2068456304570039
    num_agent_steps_sampled: 305000
    num_agent_steps_trained: 305000
    num_steps_sampled: 305000
    num_steps_trained: 305000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,305,7174.72,305000,-39.667,-29.9,-56.5,396.67


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 306000
  custom_metrics: {}
  date: 2021-10-21_21-45-27
  done: false
  episode_len_mean: 391.41
  episode_media: {}
  episode_reward_max: -29.900000000000155
  episode_reward_mean: -39.14100000000028
  episode_reward_min: -55.60000000000052
  episodes_this_iter: 3
  episodes_total: 792
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7637737300742189e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0900014970037673
          entropy_coeff: 0.009999999999999998
          kl: 0.012634814812303653
          policy_loss: 0.07507652391990026
          total_loss: 1.1187980939944586
          vf_explained_var: 0.09214439243078232
          vf_loss: 1.05462157064014
    num_agent_steps_sampled: 306000
    num_agent_steps_trained: 306000
    num_steps_sampled: 306000
    num_steps_trained: 306000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,306,7199.69,306000,-39.141,-29.9,-55.6,391.41


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 307000
  custom_metrics: {}
  date: 2021-10-21_21-45-52
  done: false
  episode_len_mean: 385.5
  episode_media: {}
  episode_reward_max: -29.900000000000155
  episode_reward_mean: -38.55000000000028
  episode_reward_min: -54.0000000000005
  episodes_this_iter: 3
  episodes_total: 795
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7637737300742189e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1189651979340447
          entropy_coeff: 0.009999999999999998
          kl: 0.020839629587038406
          policy_loss: 0.05195584926340315
          total_loss: 1.3796833720472124
          vf_explained_var: 0.04031634330749512
          vf_loss: 1.3389171317219735
    num_agent_steps_sampled: 307000
    num_agent_steps_trained: 307000
    num_steps_sampled: 307000
    num_steps_trained: 307000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,307,7224.16,307000,-38.55,-29.9,-54,385.5


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 308000
  custom_metrics: {}
  date: 2021-10-21_21-46-16
  done: false
  episode_len_mean: 383.16
  episode_media: {}
  episode_reward_max: -29.900000000000155
  episode_reward_mean: -38.31600000000028
  episode_reward_min: -54.0000000000005
  episodes_this_iter: 2
  episodes_total: 797
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.64566059511133e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0185286038451724
          entropy_coeff: 0.009999999999999998
          kl: 0.018705376955409945
          policy_loss: -0.08918543143404854
          total_loss: 1.225391391913096
          vf_explained_var: -0.24495987594127655
          vf_loss: 1.3247620693511433
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_steps_sampled: 308000
    num_steps_trained: 308000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,308,7248.28,308000,-38.316,-29.9,-54,383.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 309000
  custom_metrics: {}
  date: 2021-10-21_21-46-38
  done: false
  episode_len_mean: 380.58
  episode_media: {}
  episode_reward_max: -29.900000000000155
  episode_reward_mean: -38.05800000000027
  episode_reward_min: -54.0000000000005
  episodes_this_iter: 3
  episodes_total: 800
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.64566059511133e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0912331991725497
          entropy_coeff: 0.009999999999999998
          kl: 0.008737646042372937
          policy_loss: 0.007812878323925866
          total_loss: 1.2739001231061087
          vf_explained_var: 0.009929727762937546
          vf_loss: 1.2769995623164707
    num_agent_steps_sampled: 309000
    num_agent_steps_trained: 309000
    num_steps_sampled: 309000
    num_steps_trained: 309000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,309,7270.49,309000,-38.058,-29.9,-54,380.58


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 310000
  custom_metrics: {}
  date: 2021-10-21_21-47-02
  done: false
  episode_len_mean: 377.22
  episode_media: {}
  episode_reward_max: -29.900000000000155
  episode_reward_mean: -37.722000000000264
  episode_reward_min: -54.0000000000005
  episodes_this_iter: 3
  episodes_total: 803
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.64566059511133e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0188238441944122
          entropy_coeff: 0.009999999999999998
          kl: 0.008587914216089487
          policy_loss: 0.04461161626709832
          total_loss: 1.3499193429946899
          vf_explained_var: -0.273867130279541
          vf_loss: 1.3154959319366348
    num_agent_steps_sampled: 310000
    num_agent_steps_trained: 310000
    num_steps_sampled: 310000
    num_steps_trained: 310000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,310,7294.03,310000,-37.722,-29.9,-54,377.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 311000
  custom_metrics: {}
  date: 2021-10-21_21-47-24
  done: false
  episode_len_mean: 373.49
  episode_media: {}
  episode_reward_max: -29.900000000000155
  episode_reward_mean: -37.34900000000026
  episode_reward_min: -54.0000000000005
  episodes_this_iter: 3
  episodes_total: 806
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.64566059511133e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.956736300388972
          entropy_coeff: 0.009999999999999998
          kl: 0.007235898411939937
          policy_loss: 0.06605695320500268
          total_loss: 1.2649122122261258
          vf_explained_var: -0.2761319577693939
          vf_loss: 1.2084226136406262
    num_agent_steps_sampled: 311000
    num_agent_steps_trained: 311000
    num_steps_sampled: 311000
    num_steps_trained: 311000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,311,7316.47,311000,-37.349,-29.9,-54,373.49


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 312000
  custom_metrics: {}
  date: 2021-10-21_21-47-49
  done: false
  episode_len_mean: 368.09
  episode_media: {}
  episode_reward_max: -29.900000000000155
  episode_reward_mean: -36.80900000000025
  episode_reward_min: -54.0000000000005
  episodes_this_iter: 3
  episodes_total: 809
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.64566059511133e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9401773293813069
          entropy_coeff: 0.009999999999999998
          kl: 0.014930601505258906
          policy_loss: 0.06603458606534535
          total_loss: 1.1742079324192471
          vf_explained_var: -0.07383459806442261
          vf_loss: 1.1175750740700299
    num_agent_steps_sampled: 312000
    num_agent_steps_trained: 312000
    num_steps_sampled: 312000
    num_steps_trained: 312000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,312,7341.63,312000,-36.809,-29.9,-54,368.09




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 313000
  custom_metrics: {}
  date: 2021-10-21_21-48-37
  done: false
  episode_len_mean: 360.8
  episode_media: {}
  episode_reward_max: -27.300000000000118
  episode_reward_mean: -36.08000000000025
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 3
  episodes_total: 812
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.64566059511133e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8562033229404026
          entropy_coeff: 0.009999999999999998
          kl: 0.011911326818369754
          policy_loss: -0.11484871812992625
          total_loss: 1.7114461594157748
          vf_explained_var: 0.03988262638449669
          vf_loss: 1.834856887658437
    num_agent_steps_sampled: 313000
    num_agent_steps_trained: 313000
    num_steps_sampled: 313000
    num_steps_trained: 313000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,313,7389.37,313000,-36.08,-27.3,-52.6,360.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 314000
  custom_metrics: {}
  date: 2021-10-21_21-49-06
  done: false
  episode_len_mean: 352.79
  episode_media: {}
  episode_reward_max: -27.300000000000118
  episode_reward_mean: -35.27900000000024
  episode_reward_min: -47.2000000000004
  episodes_this_iter: 4
  episodes_total: 816
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.64566059511133e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8324240843454996
          entropy_coeff: 0.009999999999999998
          kl: 0.012869139706668323
          policy_loss: 0.025219031174977622
          total_loss: 1.577500593662262
          vf_explained_var: 0.041602395474910736
          vf_loss: 1.560605776309967
    num_agent_steps_sampled: 314000
    num_agent_steps_trained: 314000
    num_steps_sampled: 314000
    num_steps_trained: 314000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,314,7418.29,314000,-35.279,-27.3,-47.2,352.79


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 315000
  custom_metrics: {}
  date: 2021-10-21_21-49-35
  done: false
  episode_len_mean: 347.06
  episode_media: {}
  episode_reward_max: -27.300000000000118
  episode_reward_mean: -34.70600000000022
  episode_reward_min: -43.40000000000035
  episodes_this_iter: 4
  episodes_total: 820
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.64566059511133e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5957733131117291
          entropy_coeff: 0.009999999999999998
          kl: 0.08604081712015249
          policy_loss: 0.026887422427535057
          total_loss: 1.4611086090405783
          vf_explained_var: 0.23713096976280212
          vf_loss: 1.4401786949899462
    num_agent_steps_sampled: 315000
    num_agent_steps_trained: 315000
    num_steps_sampled: 315000
    num_steps_trained: 315000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,315,7447.19,315000,-34.706,-27.3,-43.4,347.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 316000
  custom_metrics: {}
  date: 2021-10-21_21-50-08
  done: false
  episode_len_mean: 341.34
  episode_media: {}
  episode_reward_max: -24.00000000000007
  episode_reward_mean: -34.13400000000021
  episode_reward_min: -43.40000000000035
  episodes_this_iter: 4
  episodes_total: 824
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.968490892666995e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5260367297463947
          entropy_coeff: 0.009999999999999998
          kl: 0.019337040066056313
          policy_loss: 0.006816422649555736
          total_loss: 1.1438985149065652
          vf_explained_var: 0.4659598767757416
          vf_loss: 1.142342370086246
    num_agent_steps_sampled: 316000
    num_agent_steps_trained: 316000
    num_steps_sampled: 316000
    num_steps_trained: 316000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,316,7480.15,316000,-34.134,-24,-43.4,341.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 317000
  custom_metrics: {}
  date: 2021-10-21_21-50-42
  done: false
  episode_len_mean: 335.59
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -33.55900000000021
  episode_reward_min: -43.40000000000035
  episodes_this_iter: 4
  episodes_total: 828
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.968490892666995e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.4925374703274833
          entropy_coeff: 0.009999999999999998
          kl: 0.08986873165431031
          policy_loss: 0.002550105212463273
          total_loss: 1.185277752743827
          vf_explained_var: 0.556969404220581
          vf_loss: 1.1876526766353184
    num_agent_steps_sampled: 317000
    num_agent_steps_trained: 317000
    num_steps_sampled: 317000
    num_steps_trained: 317000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,317,7514.09,317000,-33.559,-23.9,-43.4,335.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 318000
  custom_metrics: {}
  date: 2021-10-21_21-51-12
  done: false
  episode_len_mean: 332.85
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -33.2850000000002
  episode_reward_min: -43.40000000000035
  episodes_this_iter: 3
  episodes_total: 831
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.95273633900049e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9173782560560438
          entropy_coeff: 0.009999999999999998
          kl: 0.10406357251111069
          policy_loss: -0.03052112725045946
          total_loss: 1.3186488681369357
          vf_explained_var: 0.43335914611816406
          vf_loss: 1.3583431714110905
    num_agent_steps_sampled: 318000
    num_agent_steps_trained: 318000
    num_steps_sampled: 318000
    num_steps_trained: 318000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,318,7543.96,318000,-33.285,-23.9,-43.4,332.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 319000
  custom_metrics: {}
  date: 2021-10-21_21-51-38
  done: false
  episode_len_mean: 332.76
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -33.2760000000002
  episode_reward_min: -43.40000000000035
  episodes_this_iter: 3
  episodes_total: 834
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.92910450850074e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2178749746746487
          entropy_coeff: 0.009999999999999998
          kl: 0.025435249378870928
          policy_loss: -0.0038420503338178
          total_loss: 1.1226110802756415
          vf_explained_var: 0.14882801473140717
          vf_loss: 1.1386316796557772
    num_agent_steps_sampled: 319000
    num_agent_steps_trained: 319000
    num_steps_sampled: 319000
    num_steps_trained: 319000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,319,7569.92,319000,-33.276,-23.9,-43.4,332.76


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-10-21_21-52-08
  done: false
  episode_len_mean: 331.69
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -33.1690000000002
  episode_reward_min: -43.40000000000035
  episodes_this_iter: 3
  episodes_total: 837
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3393656762751107e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.189294132259157
          entropy_coeff: 0.009999999999999998
          kl: 0.05363140698734661
          policy_loss: -0.06009213055173556
          total_loss: 0.6335884857508871
          vf_explained_var: 0.6915670037269592
          vf_loss: 0.7055728415648143
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained: 320000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,320,7599.78,320000,-33.169,-23.9,-43.4,331.69




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 321000
  custom_metrics: {}
  date: 2021-10-21_21-52-58
  done: false
  episode_len_mean: 328.79
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -32.8790000000002
  episode_reward_min: -43.40000000000035
  episodes_this_iter: 4
  episodes_total: 841
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0090485144126656e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.0449318998389774
          entropy_coeff: 0.009999999999999998
          kl: 0.03176888984398754
          policy_loss: 0.006235489911503262
          total_loss: 1.3358975066079033
          vf_explained_var: 0.5995218753814697
          vf_loss: 1.340110699998008
    num_agent_steps_sampled: 321000
    num_agent_steps_trained: 321000
    num_steps_sampled: 321000
    num_steps_trained: 321000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,321,7649.78,321000,-32.879,-23.2,-43.4,328.79


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 322000
  custom_metrics: {}
  date: 2021-10-21_21-53-31
  done: false
  episode_len_mean: 326.33
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -32.63300000000019
  episode_reward_min: -43.40000000000035
  episodes_this_iter: 4
  episodes_total: 845
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.013572771618999e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.2423254595862494
          entropy_coeff: 0.009999999999999998
          kl: 0.022810342273800504
          policy_loss: 0.11043223871125116
          total_loss: 0.554920431640413
          vf_explained_var: 0.8849270343780518
          vf_loss: 0.45691075921058655
    num_agent_steps_sampled: 322000
    num_agent_steps_trained: 322000
    num_steps_sampled: 322000
    num_steps_trained: 322000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,322,7683.48,322000,-32.633,-23.2,-43.4,326.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 323000
  custom_metrics: {}
  date: 2021-10-21_21-54-04
  done: false
  episode_len_mean: 322.49
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -32.249000000000194
  episode_reward_min: -43.40000000000035
  episodes_this_iter: 4
  episodes_total: 849
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.5203591574284975e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.2976317789819505
          entropy_coeff: 0.009999999999999998
          kl: 0.012642738960536961
          policy_loss: -0.06900110219915707
          total_loss: 0.7323226028018528
          vf_explained_var: 0.6436589360237122
          vf_loss: 0.8142994599209892
    num_agent_steps_sampled: 323000
    num_agent_steps_trained: 323000
    num_steps_sampled: 323000
    num_steps_trained: 323000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,323,7716,323000,-32.249,-23.2,-43.4,322.49


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 324000
  custom_metrics: {}
  date: 2021-10-21_21-54-34
  done: false
  episode_len_mean: 321.18
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -32.11800000000019
  episode_reward_min: -43.40000000000035
  episodes_this_iter: 3
  episodes_total: 852
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.5203591574284975e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.3234998967912461
          entropy_coeff: 0.009999999999999998
          kl: 0.009083128447028344
          policy_loss: -0.014787756734424166
          total_loss: 0.35220617883735233
          vf_explained_var: 0.8925947546958923
          vf_loss: 0.38022852424118253
    num_agent_steps_sampled: 324000
    num_agent_steps_trained: 324000
    num_steps_sampled: 324000
    num_steps_trained: 324000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,324,7745.79,324000,-32.118,-23.2,-43.4,321.18


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 325000
  custom_metrics: {}
  date: 2021-10-21_21-55-06
  done: false
  episode_len_mean: 316.78
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -31.67800000000018
  episode_reward_min: -43.40000000000035
  episodes_this_iter: 4
  episodes_total: 856
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.5203591574284975e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.3539703090985615
          entropy_coeff: 0.009999999999999998
          kl: 0.018985157266336344
          policy_loss: 0.0853936289333635
          total_loss: 0.3691519526971711
          vf_explained_var: 0.9133407473564148
          vf_loss: 0.29729717440075343
    num_agent_steps_sampled: 325000
    num_agent_steps_trained: 325000
    num_steps_sampled: 325000
    num_steps_trained: 325000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,325,7777.93,325000,-31.678,-23.2,-43.4,316.78


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 326000
  custom_metrics: {}
  date: 2021-10-21_21-55-35
  done: false
  episode_len_mean: 313.76
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -31.376000000000182
  episode_reward_min: -43.40000000000035
  episodes_this_iter: 4
  episodes_total: 860
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.5203591574284975e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.4442804124620225
          entropy_coeff: 0.009999999999999998
          kl: 0.010447827926012742
          policy_loss: -0.050350154149863456
          total_loss: 0.534388256404135
          vf_explained_var: 0.7967962622642517
          vf_loss: 0.5991807407803006
    num_agent_steps_sampled: 326000
    num_agent_steps_trained: 326000
    num_steps_sampled: 326000
    num_steps_trained: 326000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,326,7807.28,326000,-31.376,-23.2,-43.4,313.76


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 327000
  custom_metrics: {}
  date: 2021-10-21_21-56-05
  done: false
  episode_len_mean: 311.61
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -31.161000000000172
  episode_reward_min: -43.40000000000035
  episodes_this_iter: 3
  episodes_total: 863
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.5203591574284975e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.4025785287221273
          entropy_coeff: 0.009999999999999998
          kl: 0.006689152722412296
          policy_loss: 0.08281108033325937
          total_loss: 0.5392203387286928
          vf_explained_var: 0.658979058265686
          vf_loss: 0.4704347410135799
    num_agent_steps_sampled: 327000
    num_agent_steps_trained: 327000
    num_steps_sampled: 327000
    num_steps_trained: 327000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,327,7837.2,327000,-31.161,-23.2,-43.4,311.61


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 328000
  custom_metrics: {}
  date: 2021-10-21_21-56-35
  done: false
  episode_len_mean: 309.39
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -30.93900000000017
  episode_reward_min: -43.40000000000035
  episodes_this_iter: 4
  episodes_total: 867
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.5203591574284975e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.5898077408472697
          entropy_coeff: 0.009999999999999998
          kl: 0.009543856151461128
          policy_loss: -0.0019359487626287673
          total_loss: 0.4035760392745336
          vf_explained_var: 0.789831817150116
          vf_loss: 0.42140963971614837
    num_agent_steps_sampled: 328000
    num_agent_steps_trained: 328000
    num_steps_sampled: 328000
    num_steps_trained: 328000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,328,7866.41,328000,-30.939,-23.2,-43.4,309.39




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 329000
  custom_metrics: {}
  date: 2021-10-21_21-57-22
  done: false
  episode_len_mean: 306.15
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -30.615000000000165
  episode_reward_min: -43.40000000000035
  episodes_this_iter: 3
  episodes_total: 870
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.5203591574284975e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.477461290359497
          entropy_coeff: 0.009999999999999998
          kl: 0.008508982249924833
          policy_loss: -0.11590699085758792
          total_loss: 0.4756460961368349
          vf_explained_var: 0.6026570200920105
          vf_loss: 0.6063273164961073
    num_agent_steps_sampled: 329000
    num_agent_steps_trained: 329000
    num_steps_sampled: 329000
    num_steps_trained: 329000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,329,7913.46,329000,-30.615,-23.2,-43.4,306.15


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 330000
  custom_metrics: {}
  date: 2021-10-21_21-57-52
  done: false
  episode_len_mean: 301.06
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -30.106000000000158
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 874
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.5203591574284975e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.4972680886586507
          entropy_coeff: 0.009999999999999998
          kl: 0.011489059823854574
          policy_loss: -0.03382199936442905
          total_loss: 0.6843865851561228
          vf_explained_var: 0.5391090512275696
          vf_loss: 0.7331807447804345
    num_agent_steps_sampled: 330000
    num_agent_steps_trained: 330000
    num_steps_sampled: 330000
    num_steps_trained: 330000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,330,7943.43,330000,-30.106,-23.2,-39,301.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 331000
  custom_metrics: {}
  date: 2021-10-21_21-58-22
  done: false
  episode_len_mean: 297.2
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -29.72000000000015
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 878
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.5203591574284975e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.4501887453926934
          entropy_coeff: 0.009999999999999998
          kl: 0.010174847131302735
          policy_loss: 0.0030440688961082033
          total_loss: 0.6351392537355423
          vf_explained_var: 0.5802954435348511
          vf_loss: 0.6465966191556719
    num_agent_steps_sampled: 331000
    num_agent_steps_trained: 331000
    num_steps_sampled: 331000
    num_steps_trained: 331000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,331,7973.96,331000,-29.72,-23.2,-39,297.2


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 332000
  custom_metrics: {}
  date: 2021-10-21_21-58-53
  done: false
  episode_len_mean: 295.74
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -29.574000000000147
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 3
  episodes_total: 881
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.5203591574284975e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.4515532321400113
          entropy_coeff: 0.009999999999999998
          kl: 0.011849231702160444
          policy_loss: -0.0464604039159086
          total_loss: 0.47502166595723894
          vf_explained_var: 0.7119272947311401
          vf_loss: 0.5359970649083455
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_steps_sampled: 332000
    num_steps_trained: 332000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,332,8004.71,332000,-29.574,-23.2,-39,295.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 333000
  custom_metrics: {}
  date: 2021-10-21_21-59-23
  done: false
  episode_len_mean: 292.72
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -29.272000000000148
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 885
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.5203591574284975e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.384622457292345
          entropy_coeff: 0.009999999999999998
          kl: 0.023629190316819187
          policy_loss: 0.03521158778005176
          total_loss: 0.42404438033699987
          vf_explained_var: 0.7816483974456787
          vf_loss: 0.4026779453787539
    num_agent_steps_sampled: 333000
    num_agent_steps_trained: 333000
    num_steps_sampled: 333000
    num_steps_trained: 333000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,333,8035.11,333000,-29.272,-23.2,-39,292.72


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 334000
  custom_metrics: {}
  date: 2021-10-21_21-59-53
  done: false
  episode_len_mean: 290.09
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -29.009000000000146
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 889
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.780538736142746e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.4170147524939642
          entropy_coeff: 0.009999999999999998
          kl: 0.009449879732732884
          policy_loss: 0.01338665419154697
          total_loss: 0.4642558604478836
          vf_explained_var: 0.8715695142745972
          vf_loss: 0.4650387164619234
    num_agent_steps_sampled: 334000
    num_agent_steps_trained: 334000
    num_steps_sampled: 334000
    num_steps_trained: 334000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,334,8065.02,334000,-29.009,-23.2,-39,290.09


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 335000
  custom_metrics: {}
  date: 2021-10-21_22-00-24
  done: false
  episode_len_mean: 288.2
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.820000000000135
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 3
  episodes_total: 892
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.780538736142746e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.4320912082990012
          entropy_coeff: 0.009999999999999998
          kl: 0.00629755581483601
          policy_loss: -0.09453961609138382
          total_loss: 0.3459147072500653
          vf_explained_var: 0.8004315495491028
          vf_loss: 0.4547748121950361
    num_agent_steps_sampled: 335000
    num_agent_steps_trained: 335000
    num_steps_sampled: 335000
    num_steps_trained: 335000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,335,8095.48,335000,-28.82,-23.2,-39,288.2


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 336000
  custom_metrics: {}
  date: 2021-10-21_22-00-53
  done: false
  episode_len_mean: 285.59
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.559000000000133
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 896
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.780538736142746e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.4768848882781134
          entropy_coeff: 0.009999999999999998
          kl: 0.013286671505583965
          policy_loss: 0.04080680815709962
          total_loss: 0.4475215117136637
          vf_explained_var: 0.8092970252037048
          vf_loss: 0.42148265341917673
    num_agent_steps_sampled: 336000
    num_agent_steps_trained: 336000
    num_steps_sampled: 336000
    num_steps_trained: 336000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,336,8125.14,336000,-28.559,-23.2,-39,285.59




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 337000
  custom_metrics: {}
  date: 2021-10-21_22-01-41
  done: false
  episode_len_mean: 281.5
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.150000000000126
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 900
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.780538736142746e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.5804698626200357
          entropy_coeff: 0.009999999999999998
          kl: 0.02452789480850931
          policy_loss: 0.008662511015103923
          total_loss: 0.6862693025006188
          vf_explained_var: 0.5400355458259583
          vf_loss: 0.6934098309940762
    num_agent_steps_sampled: 337000
    num_agent_steps_trained: 337000
    num_steps_sampled: 337000
    num_steps_trained: 337000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,337,8172.27,337000,-28.15,-23.2,-39,281.5


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 338000
  custom_metrics: {}
  date: 2021-10-21_22-02-09
  done: false
  episode_len_mean: 279.62
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.962000000000124
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 3
  episodes_total: 903
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010170808104214116
          cur_lr: 5.000000000000001e-05
          entropy: 1.6077605909771389
          entropy_coeff: 0.009999999999999998
          kl: 0.010980582219923226
          policy_loss: 0.07177937146690157
          total_loss: 0.6027902871370315
          vf_explained_var: 0.5004139542579651
          vf_loss: 0.5470873978402879
    num_agent_steps_sampled: 338000
    num_agent_steps_trained: 338000
    num_steps_sampled: 338000
    num_steps_trained: 338000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,338,8200.51,338000,-27.962,-23.2,-39,279.62


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 339000
  custom_metrics: {}
  date: 2021-10-21_22-02-34
  done: false
  episode_len_mean: 279.17
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.91700000000013
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 3
  episodes_total: 906
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010170808104214116
          cur_lr: 5.000000000000001e-05
          entropy: 1.4927196926540798
          entropy_coeff: 0.009999999999999998
          kl: 0.011541283850327823
          policy_loss: 0.03470173246330685
          total_loss: 0.7170862502521939
          vf_explained_var: 0.4894913136959076
          vf_loss: 0.6973105423152447
    num_agent_steps_sampled: 339000
    num_agent_steps_trained: 339000
    num_steps_sampled: 339000
    num_steps_trained: 339000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,339,8225.55,339000,-27.917,-23.2,-39,279.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 340000
  custom_metrics: {}
  date: 2021-10-21_22-02-58
  done: false
  episode_len_mean: 279.11
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.911000000000126
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 3
  episodes_total: 909
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010170808104214116
          cur_lr: 5.000000000000001e-05
          entropy: 1.4913096719317966
          entropy_coeff: 0.009999999999999998
          kl: 0.009122028903505332
          policy_loss: -0.026296498709254795
          total_loss: 0.9320168667369418
          vf_explained_var: 0.1638856679201126
          vf_loss: 0.9732255450553364
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_steps_sampled: 340000
    num_steps_trained: 340000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,340,8249.63,340000,-27.911,-23.2,-39,279.11


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 341000
  custom_metrics: {}
  date: 2021-10-21_22-03-25
  done: false
  episode_len_mean: 279.99
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.999000000000127
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 3
  episodes_total: 912
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010170808104214116
          cur_lr: 5.000000000000001e-05
          entropy: 1.3721428367826674
          entropy_coeff: 0.009999999999999998
          kl: 0.010840317962960337
          policy_loss: -0.09918638385004468
          total_loss: 0.9683221949471368
          vf_explained_var: 0.15733981132507324
          vf_loss: 1.0812289158503214
    num_agent_steps_sampled: 341000
    num_agent_steps_trained: 341000
    num_steps_sampled: 341000
    num_steps_trained: 341000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,341,8277,341000,-27.999,-23.2,-39,279.99


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 342000
  custom_metrics: {}
  date: 2021-10-21_22-03-52
  done: false
  episode_len_mean: 280.95
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.095000000000127
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 916
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010170808104214116
          cur_lr: 5.000000000000001e-05
          entropy: 1.3454414447148642
          entropy_coeff: 0.009999999999999998
          kl: 0.008169600520509777
          policy_loss: 0.01568838871187634
          total_loss: 1.3932738807466296
          vf_explained_var: 0.08291001617908478
          vf_loss: 1.3910390549235874
    num_agent_steps_sampled: 342000
    num_agent_steps_trained: 342000
    num_steps_sampled: 342000
    num_steps_trained: 342000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,342,8303.61,342000,-28.095,-23.2,-39,280.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 343000
  custom_metrics: {}
  date: 2021-10-21_22-04-20
  done: false
  episode_len_mean: 281.59
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.159000000000134
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 3
  episodes_total: 919
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010170808104214116
          cur_lr: 5.000000000000001e-05
          entropy: 1.4199099593692355
          entropy_coeff: 0.009999999999999998
          kl: 0.011788576603593823
          policy_loss: 0.06449359390470717
          total_loss: 1.0364296095238792
          vf_explained_var: 0.1325312703847885
          vf_loss: 0.9861339030994309
    num_agent_steps_sampled: 343000
    num_agent_steps_trained: 343000
    num_steps_sampled: 343000
    num_steps_trained: 343000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,343,8331.47,343000,-28.159,-23.2,-39,281.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 344000
  custom_metrics: {}
  date: 2021-10-21_22-04-45
  done: false
  episode_len_mean: 283.73
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.37300000000014
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 3
  episodes_total: 922
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010170808104214116
          cur_lr: 5.000000000000001e-05
          entropy: 1.3780855536460876
          entropy_coeff: 0.009999999999999998
          kl: 0.005465177443081615
          policy_loss: 0.041781810836659536
          total_loss: 1.0827784727017085
          vf_explained_var: 0.12567344307899475
          vf_loss: 1.0547769745190938
    num_agent_steps_sampled: 344000
    num_agent_steps_trained: 344000
    num_steps_sampled: 344000
    num_steps_trained: 344000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,344,8356.92,344000,-28.373,-23.2,-39,283.73


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 345000
  custom_metrics: {}
  date: 2021-10-21_22-05-12
  done: false
  episode_len_mean: 286.15
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.61500000000014
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 3
  episodes_total: 925
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010170808104214116
          cur_lr: 5.000000000000001e-05
          entropy: 1.4040437870555453
          entropy_coeff: 0.009999999999999998
          kl: 0.015455875482818933
          policy_loss: -0.006310581829812791
          total_loss: 1.0549676073922052
          vf_explained_var: 0.03410385921597481
          vf_loss: 1.0753170566426382
    num_agent_steps_sampled: 345000
    num_agent_steps_trained: 345000
    num_steps_sampled: 345000
    num_steps_trained: 345000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,345,8383.42,345000,-28.615,-23.2,-39,286.15


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 346000
  custom_metrics: {}
  date: 2021-10-21_22-05-38
  done: false
  episode_len_mean: 288.44
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.844000000000143
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 3
  episodes_total: 928
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010170808104214116
          cur_lr: 5.000000000000001e-05
          entropy: 1.4217235114839342
          entropy_coeff: 0.009999999999999998
          kl: 0.010610788764415464
          policy_loss: -0.10214045196771622
          total_loss: 1.471929915746053
          vf_explained_var: 0.04002610966563225
          vf_loss: 1.5882865111033122
    num_agent_steps_sampled: 346000
    num_agent_steps_trained: 346000
    num_steps_sampled: 346000
    num_steps_trained: 346000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,346,8409.69,346000,-28.844,-23.2,-39,288.44




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 347000
  custom_metrics: {}
  date: 2021-10-21_22-06-21
  done: false
  episode_len_mean: 289.85
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.98500000000014
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 3
  episodes_total: 931
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010170808104214116
          cur_lr: 5.000000000000001e-05
          entropy: 1.3308428830570644
          entropy_coeff: 0.009999999999999998
          kl: 0.017698043663547935
          policy_loss: -0.09396748666961988
          total_loss: 1.246835023827023
          vf_explained_var: 0.009552163071930408
          vf_loss: 1.3541091408994463
    num_agent_steps_sampled: 347000
    num_agent_steps_trained: 347000
    num_steps_sampled: 347000
    num_steps_trained: 347000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,347,8452.74,347000,-28.985,-23.2,-39,289.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 348000
  custom_metrics: {}
  date: 2021-10-21_22-06-47
  done: false
  episode_len_mean: 289.6
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.960000000000147
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 3
  episodes_total: 934
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010170808104214116
          cur_lr: 5.000000000000001e-05
          entropy: 1.4226286636458503
          entropy_coeff: 0.009999999999999998
          kl: 0.011129405799432234
          policy_loss: -0.03745279643270705
          total_loss: 1.0524812943405575
          vf_explained_var: 0.11998384445905685
          vf_loss: 1.1041592379411063
    num_agent_steps_sampled: 348000
    num_agent_steps_trained: 348000
    num_steps_sampled: 348000
    num_steps_trained: 348000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,348,8478.46,348000,-28.96,-23.2,-36.5,289.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 349000
  custom_metrics: {}
  date: 2021-10-21_22-07-12
  done: false
  episode_len_mean: 290.03
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -29.003000000000146
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 3
  episodes_total: 937
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010170808104214116
          cur_lr: 5.000000000000001e-05
          entropy: 1.347348313861423
          entropy_coeff: 0.009999999999999998
          kl: 0.01592693856268938
          policy_loss: -0.05279058110382822
          total_loss: 1.3048140492704179
          vf_explained_var: 0.02737552858889103
          vf_loss: 1.371076503727171
    num_agent_steps_sampled: 349000
    num_agent_steps_trained: 349000
    num_steps_sampled: 349000
    num_steps_trained: 349000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,349,8502.93,349000,-29.003,-23.2,-36.5,290.03


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 350000
  custom_metrics: {}
  date: 2021-10-21_22-07-38
  done: false
  episode_len_mean: 292.08
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -29.208000000000148
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 3
  episodes_total: 940
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010170808104214116
          cur_lr: 5.000000000000001e-05
          entropy: 1.2237131343947516
          entropy_coeff: 0.009999999999999998
          kl: 0.015453878403551693
          policy_loss: -0.10583973328272502
          total_loss: 1.4155811693933276
          vf_explained_var: 0.09623048454523087
          vf_loss: 1.533656448788113
    num_agent_steps_sampled: 350000
    num_agent_steps_trained: 350000
    num_steps_sampled: 350000
    num_steps_trained: 350000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,350,8529.83,350000,-29.208,-23.9,-36.5,292.08


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 351000
  custom_metrics: {}
  date: 2021-10-21_22-08-05
  done: false
  episode_len_mean: 294.0
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -29.400000000000144
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 4
  episodes_total: 944
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010170808104214116
          cur_lr: 5.000000000000001e-05
          entropy: 1.2711287551456028
          entropy_coeff: 0.009999999999999998
          kl: 0.012586324161478995
          policy_loss: 0.006618854900201161
          total_loss: 1.4881207293934293
          vf_explained_var: 0.20040175318717957
          vf_loss: 1.4942118843396506
    num_agent_steps_sampled: 351000
    num_agent_steps_trained: 351000
    num_steps_sampled: 351000
    num_steps_trained: 351000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,351,8556.1,351000,-29.4,-23.9,-36.5,294


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 352000
  custom_metrics: {}
  date: 2021-10-21_22-08-28
  done: false
  episode_len_mean: 295.55
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -29.55500000000015
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 3
  episodes_total: 947
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010170808104214116
          cur_lr: 5.000000000000001e-05
          entropy: 1.2315548221270243
          entropy_coeff: 0.009999999999999998
          kl: 0.021015442505784847
          policy_loss: 0.03935123433669408
          total_loss: 0.9625380535920461
          vf_explained_var: 0.3639385998249054
          vf_loss: 0.9355002007550663
    num_agent_steps_sampled: 352000
    num_agent_steps_trained: 352000
    num_steps_sampled: 352000
    num_steps_trained: 352000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,352,8579.69,352000,-29.555,-23.9,-36.5,295.55


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 353000
  custom_metrics: {}
  date: 2021-10-21_22-08-57
  done: false
  episode_len_mean: 295.91
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -29.59100000000015
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 3
  episodes_total: 950
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00015256212156321177
          cur_lr: 5.000000000000001e-05
          entropy: 1.1336700201034546
          entropy_coeff: 0.009999999999999998
          kl: 0.008902277106425258
          policy_loss: 0.006865760518444909
          total_loss: 0.8144388483630286
          vf_explained_var: 0.4058471620082855
          vf_loss: 0.8189084306359291
    num_agent_steps_sampled: 353000
    num_agent_steps_trained: 353000
    num_steps_sampled: 353000
    num_steps_trained: 353000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,353,8608.55,353000,-29.591,-23.9,-36.5,295.91


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 354000
  custom_metrics: {}
  date: 2021-10-21_22-09-27
  done: false
  episode_len_mean: 295.54
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -29.55400000000015
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 4
  episodes_total: 954
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00015256212156321177
          cur_lr: 5.000000000000001e-05
          entropy: 1.1862961914804246
          entropy_coeff: 0.009999999999999998
          kl: 0.007963098690930634
          policy_loss: 0.02195149486263593
          total_loss: 0.8534133904510074
          vf_explained_var: 0.5919560194015503
          vf_loss: 0.8433236479759216
    num_agent_steps_sampled: 354000
    num_agent_steps_trained: 354000
    num_steps_sampled: 354000
    num_steps_trained: 354000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,354,8637.91,354000,-29.554,-23.9,-36.5,295.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 355000
  custom_metrics: {}
  date: 2021-10-21_22-09-56
  done: false
  episode_len_mean: 295.89
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -29.589000000000155
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 4
  episodes_total: 958
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00015256212156321177
          cur_lr: 5.000000000000001e-05
          entropy: 1.052207440800137
          entropy_coeff: 0.009999999999999998
          kl: 0.0052080768933494975
          policy_loss: -0.0021350924339559344
          total_loss: 0.8678399840990703
          vf_explained_var: 0.5968984961509705
          vf_loss: 0.8804963476128048
    num_agent_steps_sampled: 355000
    num_agent_steps_trained: 355000
    num_steps_sampled: 355000
    num_steps_trained: 355000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,355,8667.57,355000,-29.589,-23.9,-36.5,295.89




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 356000
  custom_metrics: {}
  date: 2021-10-21_22-10-44
  done: false
  episode_len_mean: 295.25
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -29.52500000000015
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 3
  episodes_total: 961
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00015256212156321177
          cur_lr: 5.000000000000001e-05
          entropy: 1.1453207373619079
          entropy_coeff: 0.009999999999999998
          kl: 0.007924504522162618
          policy_loss: -0.01778210219409731
          total_loss: 0.6562901298205058
          vf_explained_var: 0.6531115174293518
          vf_loss: 0.6855242313610183
    num_agent_steps_sampled: 356000
    num_agent_steps_trained: 356000
    num_steps_sampled: 356000
    num_steps_trained: 356000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,356,8715.05,356000,-29.525,-23.9,-36.5,295.25


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 357000
  custom_metrics: {}
  date: 2021-10-21_22-11-12
  done: false
  episode_len_mean: 295.18
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -29.51800000000014
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 4
  episodes_total: 965
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00015256212156321177
          cur_lr: 5.000000000000001e-05
          entropy: 1.1412209431330362
          entropy_coeff: 0.009999999999999998
          kl: 0.00594739174500888
          policy_loss: -0.031286406185891895
          total_loss: 0.82181532714102
          vf_explained_var: 0.6318408250808716
          vf_loss: 0.8645130382643805
    num_agent_steps_sampled: 357000
    num_agent_steps_trained: 357000
    num_steps_sampled: 357000
    num_steps_trained: 357000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,357,8743.1,357000,-29.518,-23.9,-36.5,295.18


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 358000
  custom_metrics: {}
  date: 2021-10-21_22-11-40
  done: false
  episode_len_mean: 295.56
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -29.55600000000015
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 3
  episodes_total: 968
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00015256212156321177
          cur_lr: 5.000000000000001e-05
          entropy: 1.201556098461151
          entropy_coeff: 0.009999999999999998
          kl: 0.008784720856795285
          policy_loss: 0.02979764441649119
          total_loss: 0.6042763984865612
          vf_explained_var: 0.7100183963775635
          vf_loss: 0.5864929700477256
    num_agent_steps_sampled: 358000
    num_agent_steps_trained: 358000
    num_steps_sampled: 358000
    num_steps_trained: 358000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,358,8771.63,358000,-29.556,-23.9,-36.5,295.56


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 359000
  custom_metrics: {}
  date: 2021-10-21_22-12-07
  done: false
  episode_len_mean: 296.58
  episode_media: {}
  episode_reward_max: -24.100000000000072
  episode_reward_mean: -29.658000000000147
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 3
  episodes_total: 971
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00015256212156321177
          cur_lr: 5.000000000000001e-05
          entropy: 1.0890535195668538
          entropy_coeff: 0.009999999999999998
          kl: 0.008521226729147853
          policy_loss: -0.12378871817555692
          total_loss: 0.7611627750926547
          vf_explained_var: 0.5576733946800232
          vf_loss: 0.8958407176865472
    num_agent_steps_sampled: 359000
    num_agent_steps_trained: 359000
    num_steps_sampled: 359000
    num_steps_trained: 359000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,359,8797.88,359000,-29.658,-24.1,-36.5,296.58


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-10-21_22-12-34
  done: false
  episode_len_mean: 298.01
  episode_media: {}
  episode_reward_max: -24.100000000000072
  episode_reward_mean: -29.801000000000155
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 4
  episodes_total: 975
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00015256212156321177
          cur_lr: 5.000000000000001e-05
          entropy: 1.0988071812523736
          entropy_coeff: 0.009999999999999998
          kl: 0.0056667182919238595
          policy_loss: -0.01969562710987197
          total_loss: 0.9338116771645016
          vf_explained_var: 0.593163788318634
          vf_loss: 0.9644945283730825
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trained: 360000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,360,8825.19,360000,-29.801,-24.1,-36.5,298.01


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 361000
  custom_metrics: {}
  date: 2021-10-21_22-13-03
  done: false
  episode_len_mean: 298.56
  episode_media: {}
  episode_reward_max: -24.100000000000072
  episode_reward_mean: -29.856000000000154
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 3
  episodes_total: 978
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00015256212156321177
          cur_lr: 5.000000000000001e-05
          entropy: 1.0709634410010445
          entropy_coeff: 0.009999999999999998
          kl: 0.012176087564990072
          policy_loss: 0.09888496945301692
          total_loss: 0.7701491481728024
          vf_explained_var: 0.6093651056289673
          vf_loss: 0.6819719525261058
    num_agent_steps_sampled: 361000
    num_agent_steps_trained: 361000
    num_steps_sampled: 361000
    num_steps_trained: 361000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,361,8854.23,361000,-29.856,-24.1,-36.5,298.56


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 362000
  custom_metrics: {}
  date: 2021-10-21_22-13-28
  done: false
  episode_len_mean: 299.5
  episode_media: {}
  episode_reward_max: -24.100000000000072
  episode_reward_mean: -29.95000000000016
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 4
  episodes_total: 982
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00015256212156321177
          cur_lr: 5.000000000000001e-05
          entropy: 1.0221653044223786
          entropy_coeff: 0.009999999999999998
          kl: 0.011475956010624106
          policy_loss: -0.030712746332089105
          total_loss: 0.9835876047611236
          vf_explained_var: 0.5325949788093567
          vf_loss: 1.024520277314716
    num_agent_steps_sampled: 362000
    num_agent_steps_trained: 362000
    num_steps_sampled: 362000
    num_steps_trained: 362000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,362,8879.57,362000,-29.95,-24.1,-36.5,299.5


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 363000
  custom_metrics: {}
  date: 2021-10-21_22-13-57
  done: false
  episode_len_mean: 300.27
  episode_media: {}
  episode_reward_max: -24.100000000000072
  episode_reward_mean: -30.027000000000157
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 3
  episodes_total: 985
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00015256212156321177
          cur_lr: 5.000000000000001e-05
          entropy: 1.092000883155399
          entropy_coeff: 0.009999999999999998
          kl: 0.007091649397269685
          policy_loss: -0.011364800731341045
          total_loss: 0.6564508352014754
          vf_explained_var: 0.727388858795166
          vf_loss: 0.6787345622148778
    num_agent_steps_sampled: 363000
    num_agent_steps_trained: 363000
    num_steps_sampled: 363000
    num_steps_trained: 363000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,363,8907.74,363000,-30.027,-24.1,-36.5,300.27


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 364000
  custom_metrics: {}
  date: 2021-10-21_22-14-27
  done: false
  episode_len_mean: 300.22
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -30.02200000000016
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 4
  episodes_total: 989
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00015256212156321177
          cur_lr: 5.000000000000001e-05
          entropy: 1.0558401472038692
          entropy_coeff: 0.009999999999999998
          kl: 0.034657445942389765
          policy_loss: 0.019991603742043177
          total_loss: 1.4030467523468866
          vf_explained_var: 0.34681007266044617
          vf_loss: 1.3936082601547242
    num_agent_steps_sampled: 364000
    num_agent_steps_trained: 364000
    num_steps_sampled: 364000
    num_steps_trained: 364000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,364,8937.63,364000,-30.022,-22.8,-36.5,300.22




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 365000
  custom_metrics: {}
  date: 2021-10-21_22-15-12
  done: false
  episode_len_mean: 300.57
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -30.05700000000016
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 3
  episodes_total: 992
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00022884318234481766
          cur_lr: 5.000000000000001e-05
          entropy: 1.1355489048692915
          entropy_coeff: 0.009999999999999998
          kl: 0.03089660843618063
          policy_loss: -0.0020501285791397093
          total_loss: 0.7701232297552957
          vf_explained_var: 0.46085047721862793
          vf_loss: 0.7835217742456331
    num_agent_steps_sampled: 365000
    num_agent_steps_trained: 365000
    num_steps_sampled: 365000
    num_steps_trained: 365000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,365,8982.78,365000,-30.057,-22.8,-36.5,300.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 366000
  custom_metrics: {}
  date: 2021-10-21_22-15-41
  done: false
  episode_len_mean: 300.88
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -30.08800000000016
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 4
  episodes_total: 996
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003432647735172266
          cur_lr: 5.000000000000001e-05
          entropy: 1.0692524194717408
          entropy_coeff: 0.009999999999999998
          kl: 0.021981258131676402
          policy_loss: 0.041610548479689494
          total_loss: 1.6069520248307123
          vf_explained_var: 0.26457101106643677
          vf_loss: 1.5760264568858677
    num_agent_steps_sampled: 366000
    num_agent_steps_trained: 366000
    num_steps_sampled: 366000
    num_steps_trained: 366000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,366,9012.14,366000,-30.088,-22.8,-36.5,300.88


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 367000
  custom_metrics: {}
  date: 2021-10-21_22-16-10
  done: false
  episode_len_mean: 300.7
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -30.070000000000164
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 3
  episodes_total: 999
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005148971602758397
          cur_lr: 5.000000000000001e-05
          entropy: 1.0019939283529917
          entropy_coeff: 0.009999999999999998
          kl: 0.023919688012222513
          policy_loss: -0.09991262298491266
          total_loss: 1.1133925120035808
          vf_explained_var: 0.5036477446556091
          vf_loss: 1.2233127494653067
    num_agent_steps_sampled: 367000
    num_agent_steps_trained: 367000
    num_steps_sampled: 367000
    num_steps_trained: 367000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,367,9041.24,367000,-30.07,-22.8,-36.5,300.7


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 368000
  custom_metrics: {}
  date: 2021-10-21_22-16-39
  done: false
  episode_len_mean: 300.54
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -30.054000000000155
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 4
  episodes_total: 1003
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007723457404137598
          cur_lr: 5.000000000000001e-05
          entropy: 1.3686689880159166
          entropy_coeff: 0.009999999999999998
          kl: 0.06514108328907263
          policy_loss: 0.012195799085828993
          total_loss: 1.2926063961452907
          vf_explained_var: 0.41184285283088684
          vf_loss: 1.2940469708707598
    num_agent_steps_sampled: 368000
    num_agent_steps_trained: 368000
    num_steps_sampled: 368000
    num_steps_trained: 368000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,368,9070.31,368000,-30.054,-22.8,-36.5,300.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 369000
  custom_metrics: {}
  date: 2021-10-21_22-17-10
  done: false
  episode_len_mean: 298.07
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.80700000000015
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 4
  episodes_total: 1007
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0011585186106206396
          cur_lr: 5.000000000000001e-05
          entropy: 1.3975760221481324
          entropy_coeff: 0.009999999999999998
          kl: 0.023586546855180836
          policy_loss: 0.008992605490816964
          total_loss: 1.3143657955858443
          vf_explained_var: 0.41959521174430847
          vf_loss: 1.3193216178152296
    num_agent_steps_sampled: 369000
    num_agent_steps_trained: 369000
    num_steps_sampled: 369000
    num_steps_trained: 369000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,369,9100.96,369000,-29.807,-22.8,-36.5,298.07


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 370000
  custom_metrics: {}
  date: 2021-10-21_22-17-39
  done: false
  episode_len_mean: 296.73
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.67300000000015
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 3
  episodes_total: 1010
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0017377779159309594
          cur_lr: 5.000000000000001e-05
          entropy: 1.3359788444307115
          entropy_coeff: 0.009999999999999998
          kl: 0.010662930929835094
          policy_loss: -0.076257633500629
          total_loss: 1.117837549580468
          vf_explained_var: 0.4774535000324249
          vf_loss: 1.2074364443620047
    num_agent_steps_sampled: 370000
    num_agent_steps_trained: 370000
    num_steps_sampled: 370000
    num_steps_trained: 370000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,370,9129.84,370000,-29.673,-22.8,-36.5,296.73


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 371000
  custom_metrics: {}
  date: 2021-10-21_22-18-09
  done: false
  episode_len_mean: 294.97
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.49700000000015
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 4
  episodes_total: 1014
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0017377779159309594
          cur_lr: 5.000000000000001e-05
          entropy: 1.0829336199495527
          entropy_coeff: 0.009999999999999998
          kl: 0.013475672852867425
          policy_loss: 0.02448583423263497
          total_loss: 0.75176662935151
          vf_explained_var: 0.7424885034561157
          vf_loss: 0.7380867153406143
    num_agent_steps_sampled: 371000
    num_agent_steps_trained: 371000
    num_steps_sampled: 371000
    num_steps_trained: 371000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,371,9159.9,371000,-29.497,-22.8,-36.5,294.97


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 372000
  custom_metrics: {}
  date: 2021-10-21_22-18-38
  done: false
  episode_len_mean: 293.02
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.30200000000015
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 4
  episodes_total: 1018
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0017377779159309594
          cur_lr: 5.000000000000001e-05
          entropy: 1.0053584178288777
          entropy_coeff: 0.009999999999999998
          kl: 0.017081237005039756
          policy_loss: 0.11367158099181122
          total_loss: 0.6930967417028215
          vf_explained_var: 0.8513815999031067
          vf_loss: 0.5894490665859646
    num_agent_steps_sampled: 372000
    num_agent_steps_trained: 372000
    num_steps_sampled: 372000
    num_steps_trained: 372000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,372,9188.44,372000,-29.302,-22.8,-36.5,293.02




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 373000
  custom_metrics: {}
  date: 2021-10-21_22-19-28
  done: false
  episode_len_mean: 290.2
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -29.020000000000145
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 4
  episodes_total: 1022
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0017377779159309594
          cur_lr: 5.000000000000001e-05
          entropy: 0.9196890989939371
          entropy_coeff: 0.009999999999999998
          kl: 0.03613052975336923
          policy_loss: 0.07079459573659631
          total_loss: 0.8578210757838355
          vf_explained_var: 0.7976266741752625
          vf_loss: 0.7961605866750081
    num_agent_steps_sampled: 373000
    num_agent_steps_trained: 373000
    num_steps_sampled: 373000
    num_steps_trained: 373000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,373,9238.99,373000,-29.02,-22.3,-36.5,290.2


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 374000
  custom_metrics: {}
  date: 2021-10-21_22-20-00
  done: false
  episode_len_mean: 287.33
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -28.733000000000132
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 4
  episodes_total: 1026
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002606666873896439
          cur_lr: 5.000000000000001e-05
          entropy: 0.7889570501115587
          entropy_coeff: 0.009999999999999998
          kl: 0.01027213794626641
          policy_loss: 0.04200904412815968
          total_loss: 0.8449833571910859
          vf_explained_var: 0.6810967922210693
          vf_loss: 0.8108371059099834
    num_agent_steps_sampled: 374000
    num_agent_steps_trained: 374000
    num_steps_sampled: 374000
    num_steps_trained: 374000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,374,9271.06,374000,-28.733,-22.3,-36.5,287.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 375000
  custom_metrics: {}
  date: 2021-10-21_22-20-32
  done: false
  episode_len_mean: 284.71
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -28.471000000000135
  episode_reward_min: -36.50000000000025
  episodes_this_iter: 4
  episodes_total: 1030
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002606666873896439
          cur_lr: 5.000000000000001e-05
          entropy: 0.9427842650148603
          entropy_coeff: 0.009999999999999998
          kl: 0.03764707342441827
          policy_loss: 0.08618287282685438
          total_loss: 0.6187433862023883
          vf_explained_var: 0.8216940760612488
          vf_loss: 0.541890220840772
    num_agent_steps_sampled: 375000
    num_agent_steps_trained: 375000
    num_steps_sampled: 375000
    num_steps_trained: 375000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,375,9302.89,375000,-28.471,-22.3,-36.5,284.71


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 376000
  custom_metrics: {}
  date: 2021-10-21_22-21-04
  done: false
  episode_len_mean: 281.1
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -28.110000000000138
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 4
  episodes_total: 1034
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0039100003108446585
          cur_lr: 5.000000000000001e-05
          entropy: 0.7418659110864003
          entropy_coeff: 0.009999999999999998
          kl: 0.012653796455649156
          policy_loss: 0.05167713227371375
          total_loss: 0.42915444638994005
          vf_explained_var: 0.8734322190284729
          vf_loss: 0.3848465002245373
    num_agent_steps_sampled: 376000
    num_agent_steps_trained: 376000
    num_steps_sampled: 376000
    num_steps_trained: 376000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,376,9334.93,376000,-28.11,-22.3,-34.9,281.1


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 377000
  custom_metrics: {}
  date: 2021-10-21_22-21-36
  done: false
  episode_len_mean: 277.85
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -27.785000000000124
  episode_reward_min: -33.80000000000021
  episodes_this_iter: 4
  episodes_total: 1038
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0039100003108446585
          cur_lr: 5.000000000000001e-05
          entropy: 0.7670431554317474
          entropy_coeff: 0.009999999999999998
          kl: 0.02526389068020794
          policy_loss: -0.0014330839945210351
          total_loss: 0.39179431630505457
          vf_explained_var: 0.819553017616272
          vf_loss: 0.400799051589436
    num_agent_steps_sampled: 377000
    num_agent_steps_trained: 377000
    num_steps_sampled: 377000
    num_steps_trained: 377000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,377,9366.6,377000,-27.785,-22.3,-33.8,277.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 378000
  custom_metrics: {}
  date: 2021-10-21_22-22-07
  done: false
  episode_len_mean: 275.78
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -27.578000000000117
  episode_reward_min: -33.80000000000021
  episodes_this_iter: 4
  episodes_total: 1042
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005865000466266987
          cur_lr: 5.000000000000001e-05
          entropy: 0.5857504930761125
          entropy_coeff: 0.009999999999999998
          kl: 0.011495410993043404
          policy_loss: 0.01562040936615732
          total_loss: 0.46373454646931755
          vf_explained_var: 0.8415083885192871
          vf_loss: 0.453904218143887
    num_agent_steps_sampled: 378000
    num_agent_steps_trained: 378000
    num_steps_sampled: 378000
    num_steps_trained: 378000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,378,9397.73,378000,-27.578,-22.3,-33.8,275.78


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 379000
  custom_metrics: {}
  date: 2021-10-21_22-22-39
  done: false
  episode_len_mean: 273.07
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -27.307000000000116
  episode_reward_min: -33.80000000000021
  episodes_this_iter: 4
  episodes_total: 1046
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005865000466266987
          cur_lr: 5.000000000000001e-05
          entropy: 0.5460889703697629
          entropy_coeff: 0.009999999999999998
          kl: 0.009597897012866478
          policy_loss: 0.0530523012081782
          total_loss: 0.49770041008790333
          vf_explained_var: 0.759785532951355
          vf_loss: 0.45005270540714265
    num_agent_steps_sampled: 379000
    num_agent_steps_trained: 379000
    num_steps_sampled: 379000
    num_steps_trained: 379000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,379,9430.21,379000,-27.307,-22.3,-33.8,273.07




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 380000
  custom_metrics: {}
  date: 2021-10-21_22-23-29
  done: false
  episode_len_mean: 270.73
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -27.073000000000114
  episode_reward_min: -33.80000000000021
  episodes_this_iter: 4
  episodes_total: 1050
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005865000466266987
          cur_lr: 5.000000000000001e-05
          entropy: 0.5358085152175691
          entropy_coeff: 0.009999999999999998
          kl: 0.00892673331532876
          policy_loss: 0.02721479812430011
          total_loss: 0.578034884399838
          vf_explained_var: 0.6727744340896606
          vf_loss: 0.5561258114046521
    num_agent_steps_sampled: 380000
    num_agent_steps_trained: 380000
    num_steps_sampled: 380000
    num_steps_trained: 380000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,380,9479.7,380000,-27.073,-21.7,-33.8,270.73


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 381000
  custom_metrics: {}
  date: 2021-10-21_22-23-59
  done: false
  episode_len_mean: 270.51
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -27.051000000000116
  episode_reward_min: -33.80000000000021
  episodes_this_iter: 4
  episodes_total: 1054
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005865000466266987
          cur_lr: 5.000000000000001e-05
          entropy: 0.6987277560763889
          entropy_coeff: 0.009999999999999998
          kl: 0.010801076054279513
          policy_loss: 0.02607922711306148
          total_loss: 1.0570872498883142
          vf_explained_var: 0.19142889976501465
          vf_loss: 1.0379319528738657
    num_agent_steps_sampled: 381000
    num_agent_steps_trained: 381000
    num_steps_sampled: 381000
    num_steps_trained: 381000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,381,9509.7,381000,-27.051,-21.7,-33.8,270.51


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 382000
  custom_metrics: {}
  date: 2021-10-21_22-24-28
  done: false
  episode_len_mean: 270.64
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -27.06400000000011
  episode_reward_min: -33.80000000000021
  episodes_this_iter: 3
  episodes_total: 1057
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005865000466266987
          cur_lr: 5.000000000000001e-05
          entropy: 0.6861038830545213
          entropy_coeff: 0.009999999999999998
          kl: 0.002825647163222279
          policy_loss: -0.011101550857226054
          total_loss: 0.8319022483295865
          vf_explained_var: 0.2121839076280594
          vf_loss: 0.8498482724030813
    num_agent_steps_sampled: 382000
    num_agent_steps_trained: 382000
    num_steps_sampled: 382000
    num_steps_trained: 382000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,382,9539.01,382000,-27.064,-21.7,-33.8,270.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 383000
  custom_metrics: {}
  date: 2021-10-21_22-24-58
  done: false
  episode_len_mean: 270.82
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -27.08200000000012
  episode_reward_min: -33.80000000000021
  episodes_this_iter: 4
  episodes_total: 1061
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0029325002331334936
          cur_lr: 5.000000000000001e-05
          entropy: 0.6159112373987834
          entropy_coeff: 0.009999999999999998
          kl: 0.011153020092875584
          policy_loss: 0.00920323512206475
          total_loss: 1.1083109027809568
          vf_explained_var: 0.23142971098423004
          vf_loss: 1.105234060022566
    num_agent_steps_sampled: 383000
    num_agent_steps_trained: 383000
    num_steps_sampled: 383000
    num_steps_trained: 383000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,383,9569.11,383000,-27.082,-21.7,-33.8,270.82


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 384000
  custom_metrics: {}
  date: 2021-10-21_22-25-29
  done: false
  episode_len_mean: 269.72
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.972000000000122
  episode_reward_min: -33.80000000000021
  episodes_this_iter: 4
  episodes_total: 1065
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0029325002331334936
          cur_lr: 5.000000000000001e-05
          entropy: 0.6071046829223633
          entropy_coeff: 0.009999999999999998
          kl: 0.006414679726513839
          policy_loss: 0.02418395835492346
          total_loss: 1.1740502132309807
          vf_explained_var: 0.19933584332466125
          vf_loss: 1.1559184935357836
    num_agent_steps_sampled: 384000
    num_agent_steps_trained: 384000
    num_steps_sampled: 384000
    num_steps_trained: 384000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,384,9599.78,384000,-26.972,-21.7,-33.8,269.72


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 385000
  custom_metrics: {}
  date: 2021-10-21_22-26-01
  done: false
  episode_len_mean: 268.09
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.80900000000011
  episode_reward_min: -33.80000000000021
  episodes_this_iter: 4
  episodes_total: 1069
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0029325002331334936
          cur_lr: 5.000000000000001e-05
          entropy: 0.515431191192733
          entropy_coeff: 0.009999999999999998
          kl: 0.008898728692489281
          policy_loss: 0.019183631365497906
          total_loss: 1.2093916985723707
          vf_explained_var: 0.12004388123750687
          vf_loss: 1.19533627960417
    num_agent_steps_sampled: 385000
    num_agent_steps_trained: 385000
    num_steps_sampled: 385000
    num_steps_trained: 385000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,385,9631.86,385000,-26.809,-21.7,-33.8,268.09


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 386000
  custom_metrics: {}
  date: 2021-10-21_22-26-33
  done: false
  episode_len_mean: 265.77
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.57700000000011
  episode_reward_min: -30.400000000000162
  episodes_this_iter: 4
  episodes_total: 1073
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0029325002331334936
          cur_lr: 5.000000000000001e-05
          entropy: 0.4344472391737832
          entropy_coeff: 0.009999999999999998
          kl: 0.009997459718046706
          policy_loss: 0.02164878414736854
          total_loss: 0.9835465470949809
          vf_explained_var: 0.3696337938308716
          vf_loss: 0.9662129196855757
    num_agent_steps_sampled: 386000
    num_agent_steps_trained: 386000
    num_steps_sampled: 386000
    num_steps_trained: 386000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,386,9663.21,386000,-26.577,-21.7,-30.4,265.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 387000
  custom_metrics: {}
  date: 2021-10-21_22-27-04
  done: false
  episode_len_mean: 264.35
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.435000000000105
  episode_reward_min: -30.400000000000162
  episodes_this_iter: 4
  episodes_total: 1077
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0029325002331334936
          cur_lr: 5.000000000000001e-05
          entropy: 0.46165819234318206
          entropy_coeff: 0.009999999999999998
          kl: 0.00451860547930999
          policy_loss: 0.02107244556148847
          total_loss: 1.1438211308585273
          vf_explained_var: 0.2535812556743622
          vf_loss: 1.127351995309194
    num_agent_steps_sampled: 387000
    num_agent_steps_trained: 387000
    num_steps_sampled: 387000
    num_steps_trained: 387000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,387,9694.69,387000,-26.435,-21.7,-30.4,264.35




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 388000
  custom_metrics: {}
  date: 2021-10-21_22-27-51
  done: false
  episode_len_mean: 262.55
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.2550000000001
  episode_reward_min: -31.00000000000017
  episodes_this_iter: 4
  episodes_total: 1081
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0014662501165667468
          cur_lr: 5.000000000000001e-05
          entropy: 0.4756122565931744
          entropy_coeff: 0.009999999999999998
          kl: 0.008046816860641412
          policy_loss: 0.04335730274518331
          total_loss: 1.0010195639398363
          vf_explained_var: 0.4457906186580658
          vf_loss: 0.9624065889252557
    num_agent_steps_sampled: 388000
    num_agent_steps_trained: 388000
    num_steps_sampled: 388000
    num_steps_trained: 388000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,388,9741.59,388000,-26.255,-21.7,-31,262.55


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 389000
  custom_metrics: {}
  date: 2021-10-21_22-28-16
  done: false
  episode_len_mean: 263.41
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.3410000000001
  episode_reward_min: -33.70000000000021
  episodes_this_iter: 3
  episodes_total: 1084
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0014662501165667468
          cur_lr: 5.000000000000001e-05
          entropy: 0.870887405342526
          entropy_coeff: 0.009999999999999998
          kl: 0.012001850847149914
          policy_loss: 0.04668962367706829
          total_loss: 1.2155559367603725
          vf_explained_var: -0.27431735396385193
          vf_loss: 1.1775575923422972
    num_agent_steps_sampled: 389000
    num_agent_steps_trained: 389000
    num_steps_sampled: 389000
    num_steps_trained: 389000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,389,9766.2,389000,-26.341,-21.7,-33.7,263.41


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 390000
  custom_metrics: {}
  date: 2021-10-21_22-28-40
  done: false
  episode_len_mean: 264.29
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.429000000000105
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 3
  episodes_total: 1087
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0014662501165667468
          cur_lr: 5.000000000000001e-05
          entropy: 0.9809828950299158
          entropy_coeff: 0.009999999999999998
          kl: 0.00492061399191799
          policy_loss: 0.04218070225583182
          total_loss: 1.214306253194809
          vf_explained_var: -0.0710776150226593
          vf_loss: 1.1819281720452839
    num_agent_steps_sampled: 390000
    num_agent_steps_trained: 390000
    num_steps_sampled: 390000
    num_steps_trained: 390000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,390,9790.86,390000,-26.429,-21.7,-34,264.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 391000
  custom_metrics: {}
  date: 2021-10-21_22-29-05
  done: false
  episode_len_mean: 266.81
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.68100000000011
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 3
  episodes_total: 1090
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007331250582833734
          cur_lr: 5.000000000000001e-05
          entropy: 1.0261392652988435
          entropy_coeff: 0.009999999999999998
          kl: 0.005799273135881972
          policy_loss: 0.04726829148001141
          total_loss: 1.2533286535077626
          vf_explained_var: 0.13416321575641632
          vf_loss: 1.2163175005879667
    num_agent_steps_sampled: 391000
    num_agent_steps_trained: 391000
    num_steps_sampled: 391000
    num_steps_trained: 391000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,391,9815.15,391000,-26.681,-21.7,-37.2,266.81


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 392000
  custom_metrics: {}
  date: 2021-10-21_22-29-33
  done: false
  episode_len_mean: 266.93
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.69300000000011
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 3
  episodes_total: 1093
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007331250582833734
          cur_lr: 5.000000000000001e-05
          entropy: 0.8658433516820272
          entropy_coeff: 0.009999999999999998
          kl: 0.015206162744362138
          policy_loss: 0.04331044289800856
          total_loss: 0.9059539927376641
          vf_explained_var: 0.36495348811149597
          vf_loss: 0.8712908461689949
    num_agent_steps_sampled: 392000
    num_agent_steps_trained: 392000
    num_steps_sampled: 392000
    num_steps_trained: 392000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,392,9843.41,392000,-26.693,-21.7,-37.2,266.93


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 393000
  custom_metrics: {}
  date: 2021-10-21_22-30-05
  done: false
  episode_len_mean: 266.16
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.616000000000113
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1097
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007331250582833734
          cur_lr: 5.000000000000001e-05
          entropy: 0.5798366281721327
          entropy_coeff: 0.009999999999999998
          kl: 0.009295379294208633
          policy_loss: -0.018128156330850388
          total_loss: 1.0914718826611838
          vf_explained_var: 0.44821858406066895
          vf_loss: 1.115391590197881
    num_agent_steps_sampled: 393000
    num_agent_steps_trained: 393000
    num_steps_sampled: 393000
    num_steps_trained: 393000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,393,9875.07,393000,-26.616,-21.7,-37.2,266.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 394000
  custom_metrics: {}
  date: 2021-10-21_22-30-36
  done: false
  episode_len_mean: 264.71
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.471000000000107
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1101
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007331250582833734
          cur_lr: 5.000000000000001e-05
          entropy: 0.7575691878795624
          entropy_coeff: 0.009999999999999998
          kl: 0.011542705049398232
          policy_loss: -0.030532003276877932
          total_loss: 1.2515909963183933
          vf_explained_var: 0.3686930239200592
          vf_loss: 1.2896902309523688
    num_agent_steps_sampled: 394000
    num_agent_steps_trained: 394000
    num_steps_sampled: 394000
    num_steps_trained: 394000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,394,9906.34,394000,-26.471,-21.7,-37.2,264.71


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 395000
  custom_metrics: {}
  date: 2021-10-21_22-31-08
  done: false
  episode_len_mean: 263.94
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.39400000000011
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1105
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007331250582833734
          cur_lr: 5.000000000000001e-05
          entropy: 0.5724734173880683
          entropy_coeff: 0.009999999999999998
          kl: 0.0033518938312434602
          policy_loss: 0.005943586677312851
          total_loss: 0.9769468281004164
          vf_explained_var: 0.529727041721344
          vf_loss: 0.976725529299842
    num_agent_steps_sampled: 395000
    num_agent_steps_trained: 395000
    num_steps_sampled: 395000
    num_steps_trained: 395000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,395,9938.26,395000,-26.394,-21.7,-37.2,263.94


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 396000
  custom_metrics: {}
  date: 2021-10-21_22-31-40
  done: false
  episode_len_mean: 263.19
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.319000000000106
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1109
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003665625291416867
          cur_lr: 5.000000000000001e-05
          entropy: 0.4944779545068741
          entropy_coeff: 0.009999999999999998
          kl: 0.01255908897696224
          policy_loss: -0.038439922365877364
          total_loss: 1.010830690463384
          vf_explained_var: 0.5070642828941345
          vf_loss: 1.0542107820510864
    num_agent_steps_sampled: 396000
    num_agent_steps_trained: 396000
    num_steps_sampled: 396000
    num_steps_trained: 396000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,396,9970.21,396000,-26.319,-21.7,-37.2,263.19




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 397000
  custom_metrics: {}
  date: 2021-10-21_22-32-30
  done: false
  episode_len_mean: 261.95
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.195000000000103
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1113
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003665625291416867
          cur_lr: 5.000000000000001e-05
          entropy: 0.5269876036379072
          entropy_coeff: 0.009999999999999998
          kl: 0.00816913036792444
          policy_loss: -0.04911221067110697
          total_loss: 1.056698148449262
          vf_explained_var: 0.5424226522445679
          vf_loss: 1.1110772444142236
    num_agent_steps_sampled: 397000
    num_agent_steps_trained: 397000
    num_steps_sampled: 397000
    num_steps_trained: 397000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,397,10020.4,397000,-26.195,-21.7,-37.2,261.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 398000
  custom_metrics: {}
  date: 2021-10-21_22-33-02
  done: false
  episode_len_mean: 261.63
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.163000000000103
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1117
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003665625291416867
          cur_lr: 5.000000000000001e-05
          entropy: 0.5858693851364983
          entropy_coeff: 0.009999999999999998
          kl: 0.0077590359907250194
          policy_loss: -0.033343168679210874
          total_loss: 1.1856092366907331
          vf_explained_var: 0.5033721923828125
          vf_loss: 1.2248082558314006
    num_agent_steps_sampled: 398000
    num_agent_steps_trained: 398000
    num_steps_sampled: 398000
    num_steps_trained: 398000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,398,10052.1,398000,-26.163,-21.7,-37.2,261.63


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 399000
  custom_metrics: {}
  date: 2021-10-21_22-33-31
  done: false
  episode_len_mean: 261.68
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.168000000000102
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1121
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003665625291416867
          cur_lr: 5.000000000000001e-05
          entropy: 0.6092489937941233
          entropy_coeff: 0.009999999999999998
          kl: 0.01371884884791999
          policy_loss: -0.023232551084624395
          total_loss: 1.297240138053894
          vf_explained_var: 0.5181635618209839
          vf_loss: 1.3265601568751866
    num_agent_steps_sampled: 399000
    num_agent_steps_trained: 399000
    num_steps_sampled: 399000
    num_steps_trained: 399000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,399,10081.2,399000,-26.168,-21.7,-37.2,261.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 400000
  custom_metrics: {}
  date: 2021-10-21_22-34-01
  done: false
  episode_len_mean: 262.17
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.217000000000102
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1125
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003665625291416867
          cur_lr: 5.000000000000001e-05
          entropy: 0.6729780190520817
          entropy_coeff: 0.009999999999999998
          kl: 0.005542664491666945
          policy_loss: 0.028342467463678783
          total_loss: 1.5812243819236755
          vf_explained_var: 0.2814604938030243
          vf_loss: 1.5596096634864807
    num_agent_steps_sampled: 400000
    num_agent_steps_trained: 400000
    num_steps_sampled: 400000
    num_steps_trained: 400000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,400,10111.2,400000,-26.217,-21.7,-37.2,262.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 401000
  custom_metrics: {}
  date: 2021-10-21_22-34-31
  done: false
  episode_len_mean: 262.54
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.2540000000001
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1129
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003665625291416867
          cur_lr: 5.000000000000001e-05
          entropy: 0.8655031773779127
          entropy_coeff: 0.009999999999999998
          kl: 0.014945061099215342
          policy_loss: 0.010154708060953352
          total_loss: 1.6193395084804958
          vf_explained_var: 0.22221890091896057
          vf_loss: 1.6178343507978652
    num_agent_steps_sampled: 401000
    num_agent_steps_trained: 401000
    num_steps_sampled: 401000
    num_steps_trained: 401000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,401,10141.6,401000,-26.254,-21.7,-37.2,262.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 402000
  custom_metrics: {}
  date: 2021-10-21_22-35-01
  done: false
  episode_len_mean: 263.11
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.311000000000107
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1133
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003665625291416867
          cur_lr: 5.000000000000001e-05
          entropy: 0.6857833431826698
          entropy_coeff: 0.009999999999999998
          kl: 0.01152772409379455
          policy_loss: 0.012272803526785638
          total_loss: 1.4590593311521742
          vf_explained_var: 0.3206535577774048
          vf_loss: 1.4536401430765789
    num_agent_steps_sampled: 402000
    num_agent_steps_trained: 402000
    num_steps_sampled: 402000
    num_steps_trained: 402000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,402,10171.6,402000,-26.311,-21.7,-37.2,263.11


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 403000
  custom_metrics: {}
  date: 2021-10-21_22-35-33
  done: false
  episode_len_mean: 263.07
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.30700000000011
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1137
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003665625291416867
          cur_lr: 5.000000000000001e-05
          entropy: 0.9351480020417108
          entropy_coeff: 0.009999999999999998
          kl: 0.05859510650753737
          policy_loss: 0.0034821088115374248
          total_loss: 1.1251408060391743
          vf_explained_var: 0.4628424346446991
          vf_loss: 1.1309887137677934
    num_agent_steps_sampled: 403000
    num_agent_steps_trained: 403000
    num_steps_sampled: 403000
    num_steps_trained: 403000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,403,10203.6,403000,-26.307,-21.7,-37.2,263.07




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 404000
  custom_metrics: {}
  date: 2021-10-21_22-36-23
  done: false
  episode_len_mean: 262.74
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.2740000000001
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1141
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00054984379371253
          cur_lr: 5.000000000000001e-05
          entropy: 0.8712061756187015
          entropy_coeff: 0.009999999999999998
          kl: 0.012348585936097656
          policy_loss: 0.028458892471260495
          total_loss: 0.9348155286577012
          vf_explained_var: 0.6613308191299438
          vf_loss: 0.9150619089603425
    num_agent_steps_sampled: 404000
    num_agent_steps_trained: 404000
    num_steps_sampled: 404000
    num_steps_trained: 404000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,404,10253.3,404000,-26.274,-21.7,-37.2,262.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 405000
  custom_metrics: {}
  date: 2021-10-21_22-36-55
  done: false
  episode_len_mean: 262.96
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.296000000000102
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1145
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00054984379371253
          cur_lr: 5.000000000000001e-05
          entropy: 0.8552860584523942
          entropy_coeff: 0.009999999999999998
          kl: 0.009419397586707765
          policy_loss: 0.004648650520377689
          total_loss: 0.8900395254294078
          vf_explained_var: 0.6531009078025818
          vf_loss: 0.8939385460482703
    num_agent_steps_sampled: 405000
    num_agent_steps_trained: 405000
    num_steps_sampled: 405000
    num_steps_trained: 405000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,405,10285.4,405000,-26.296,-21.7,-37.2,262.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 406000
  custom_metrics: {}
  date: 2021-10-21_22-37-27
  done: false
  episode_len_mean: 263.35
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.335000000000104
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1149
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00054984379371253
          cur_lr: 5.000000000000001e-05
          entropy: 0.9178819239139557
          entropy_coeff: 0.009999999999999998
          kl: 0.021985494126081858
          policy_loss: 0.027764148140947024
          total_loss: 1.1823026425308651
          vf_explained_var: 0.48308807611465454
          vf_loss: 1.1637052363819547
    num_agent_steps_sampled: 406000
    num_agent_steps_trained: 406000
    num_steps_sampled: 406000
    num_steps_trained: 406000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,406,10317.3,406000,-26.335,-21.7,-37.2,263.35


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 407000
  custom_metrics: {}
  date: 2021-10-21_22-37-59
  done: false
  episode_len_mean: 263.48
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.348000000000106
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 3
  episodes_total: 1152
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008247656905687951
          cur_lr: 5.000000000000001e-05
          entropy: 0.9779848847124312
          entropy_coeff: 0.009999999999999998
          kl: 0.008430644972971715
          policy_loss: -0.07708997685048315
          total_loss: 0.9831617686483595
          vf_explained_var: 0.4670175313949585
          vf_loss: 1.0700246420171526
    num_agent_steps_sampled: 407000
    num_agent_steps_trained: 407000
    num_steps_sampled: 407000
    num_steps_trained: 407000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,407,10348.6,407000,-26.348,-21.7,-37.2,263.48


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 408000
  custom_metrics: {}
  date: 2021-10-21_22-38-28
  done: false
  episode_len_mean: 262.97
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.297000000000104
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1156
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008247656905687951
          cur_lr: 5.000000000000001e-05
          entropy: 0.7741833044422998
          entropy_coeff: 0.009999999999999998
          kl: 0.004599537085530218
          policy_loss: 0.015695792188247047
          total_loss: 1.0353120936287774
          vf_explained_var: 0.5220852494239807
          vf_loss: 1.0273543463812933
    num_agent_steps_sampled: 408000
    num_agent_steps_trained: 408000
    num_steps_sampled: 408000
    num_steps_trained: 408000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,408,10378.1,408000,-26.297,-21.7,-37.2,262.97


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 409000
  custom_metrics: {}
  date: 2021-10-21_22-39-01
  done: false
  episode_len_mean: 261.53
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.153000000000098
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1160
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00041238284528439753
          cur_lr: 5.000000000000001e-05
          entropy: 0.6402273452944226
          entropy_coeff: 0.009999999999999998
          kl: 0.005652256365634243
          policy_loss: -0.1005648566616906
          total_loss: 1.165482434961531
          vf_explained_var: 0.449109822511673
          vf_loss: 1.2724472218089633
    num_agent_steps_sampled: 409000
    num_agent_steps_trained: 409000
    num_steps_sampled: 409000
    num_steps_trained: 409000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,409,10410.8,409000,-26.153,-21.7,-37.2,261.53


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 410000
  custom_metrics: {}
  date: 2021-10-21_22-39-33
  done: false
  episode_len_mean: 261.05
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.105000000000103
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1164
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00041238284528439753
          cur_lr: 5.000000000000001e-05
          entropy: 0.6066089303957092
          entropy_coeff: 0.009999999999999998
          kl: 0.05862972429580206
          policy_loss: -0.09749728350175751
          total_loss: 1.3873001323805916
          vf_explained_var: 0.4056794345378876
          vf_loss: 1.4908393243948619
    num_agent_steps_sampled: 410000
    num_agent_steps_trained: 410000
    num_steps_sampled: 410000
    num_steps_trained: 410000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,410,10443.4,410000,-26.105,-21.7,-37.2,261.05


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 411000
  custom_metrics: {}
  date: 2021-10-21_22-40-08
  done: false
  episode_len_mean: 260.25
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.0250000000001
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 5
  episodes_total: 1169
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0006185742679265963
          cur_lr: 5.000000000000001e-05
          entropy: 0.564583131339815
          entropy_coeff: 0.009999999999999998
          kl: 0.005916095930744644
          policy_loss: -0.010129951851235495
          total_loss: 1.095634291569392
          vf_explained_var: 0.32357051968574524
          vf_loss: 1.1114064077536265
    num_agent_steps_sampled: 411000
    num_agent_steps_trained: 411000
    num_steps_sampled: 411000
    num_steps_trained: 411000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,411,10478.1,411000,-26.025,-21.6,-37.2,260.25




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 412000
  custom_metrics: {}
  date: 2021-10-21_22-41-04
  done: false
  episode_len_mean: 258.07
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -25.807000000000098
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 5
  episodes_total: 1174
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0006185742679265963
          cur_lr: 5.000000000000001e-05
          entropy: 0.4459176942706108
          entropy_coeff: 0.009999999999999998
          kl: 0.01501879118484592
          policy_loss: 0.010718412573138873
          total_loss: 1.502256617281172
          vf_explained_var: 0.17942218482494354
          vf_loss: 1.4959880669911703
    num_agent_steps_sampled: 412000
    num_agent_steps_trained: 412000
    num_steps_sampled: 412000
    num_steps_trained: 412000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,412,10533.9,412000,-25.807,-18.7,-37.2,258.07


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 413000
  custom_metrics: {}
  date: 2021-10-21_22-41-39
  done: false
  episode_len_mean: 257.27
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -25.727000000000103
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1178
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0006185742679265963
          cur_lr: 5.000000000000001e-05
          entropy: 0.6727374729182986
          entropy_coeff: 0.009999999999999998
          kl: 0.0089509183756072
          policy_loss: 0.013298747522963419
          total_loss: 1.367499221695794
          vf_explained_var: -0.0046303411945700645
          vf_loss: 1.3609223193592495
    num_agent_steps_sampled: 413000
    num_agent_steps_trained: 413000
    num_steps_sampled: 413000
    num_steps_trained: 413000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,413,10568.8,413000,-25.727,-18.7,-37.2,257.27


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 414000
  custom_metrics: {}
  date: 2021-10-21_22-42-13
  done: false
  episode_len_mean: 255.39
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -25.53900000000009
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 4
  episodes_total: 1182
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0006185742679265963
          cur_lr: 5.000000000000001e-05
          entropy: 0.643723929921786
          entropy_coeff: 0.009999999999999998
          kl: 0.00802136566886923
          policy_loss: 0.002426755428314209
          total_loss: 1.2218405114279853
          vf_explained_var: 0.02355221100151539
          vf_loss: 1.2258460296524896
    num_agent_steps_sampled: 414000
    num_agent_steps_trained: 414000
    num_steps_sampled: 414000
    num_steps_trained: 414000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,414,10603.3,414000,-25.539,-18.7,-37.2,255.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 415000
  custom_metrics: {}
  date: 2021-10-21_22-42-49
  done: false
  episode_len_mean: 250.54
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -25.054000000000087
  episode_reward_min: -37.20000000000026
  episodes_this_iter: 5
  episodes_total: 1187
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0006185742679265963
          cur_lr: 5.000000000000001e-05
          entropy: 0.27477914326720765
          entropy_coeff: 0.009999999999999998
          kl: 0.003889925374541134
          policy_loss: -0.011835000167290369
          total_loss: 1.7952263010872735
          vf_explained_var: 0.01911274343729019
          vf_loss: 1.8098066727320352
    num_agent_steps_sampled: 415000
    num_agent_steps_trained: 415000
    num_steps_sampled: 415000
    num_steps_trained: 415000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,415,10639.4,415000,-25.054,-18.7,-37.2,250.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 416000
  custom_metrics: {}
  date: 2021-10-21_22-43-23
  done: false
  episode_len_mean: 246.16
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -24.61600000000008
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 4
  episodes_total: 1191
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00030928713396329813
          cur_lr: 5.000000000000001e-05
          entropy: 0.425670799281862
          entropy_coeff: 0.009999999999999998
          kl: 0.00401195178627276
          policy_loss: 0.013974102503723568
          total_loss: 1.1384229825602636
          vf_explained_var: 0.08757952600717545
          vf_loss: 1.1287043511867523
    num_agent_steps_sampled: 416000
    num_agent_steps_trained: 416000
    num_steps_sampled: 416000
    num_steps_trained: 416000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,416,10673.3,416000,-24.616,-18.7,-31.5,246.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 417000
  custom_metrics: {}
  date: 2021-10-21_22-43-57
  done: false
  episode_len_mean: 243.98
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -24.398000000000074
  episode_reward_min: -29.50000000000015
  episodes_this_iter: 5
  episodes_total: 1196
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00015464356698164907
          cur_lr: 5.000000000000001e-05
          entropy: 0.43377182748582627
          entropy_coeff: 0.009999999999999998
          kl: 0.009106326038257073
          policy_loss: -0.0035786520275804734
          total_loss: 1.7407362818717957
          vf_explained_var: 0.04520098865032196
          vf_loss: 1.7486512382825217
    num_agent_steps_sampled: 417000
    num_agent_steps_trained: 417000
    num_steps_sampled: 417000
    num_steps_trained: 4170

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,417,10706.9,417000,-24.398,-18.7,-29.5,243.98




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 418000
  custom_metrics: {}
  date: 2021-10-21_22-44-49
  done: false
  episode_len_mean: 242.42
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -24.242000000000075
  episode_reward_min: -29.50000000000015
  episodes_this_iter: 4
  episodes_total: 1200
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00015464356698164907
          cur_lr: 5.000000000000001e-05
          entropy: 0.46796763837337496
          entropy_coeff: 0.009999999999999998
          kl: 0.004236264125836521
          policy_loss: -0.022378923495610555
          total_loss: 1.2296965572569105
          vf_explained_var: 0.1603311449289322
          vf_loss: 1.2567544870906406
    num_agent_steps_sampled: 418000
    num_agent_steps_trained: 418000
    num_steps_sampled: 418000
    num_steps_trained: 418000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,418,10759.1,418000,-24.242,-18.7,-29.5,242.42


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 419000
  custom_metrics: {}
  date: 2021-10-21_22-45-26
  done: false
  episode_len_mean: 240.9
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -24.090000000000074
  episode_reward_min: -29.50000000000015
  episodes_this_iter: 5
  episodes_total: 1205
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.732178349082453e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.2444511150320371
          entropy_coeff: 0.009999999999999998
          kl: 0.019489756068752424
          policy_loss: -0.01065322756767273
          total_loss: 1.819491986433665
          vf_explained_var: 0.12009003758430481
          vf_loss: 1.8325882275899252
    num_agent_steps_sampled: 419000
    num_agent_steps_trained: 419000
    num_steps_sampled: 419000
    num_steps_trained: 419000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,419,10795.6,419000,-24.09,-18.7,-29.5,240.9


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 420000
  custom_metrics: {}
  date: 2021-10-21_22-46-02
  done: false
  episode_len_mean: 239.57
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.957000000000065
  episode_reward_min: -29.50000000000015
  episodes_this_iter: 5
  episodes_total: 1210
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.732178349082453e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.3604560994439655
          entropy_coeff: 0.009999999999999998
          kl: 0.01285303973271681
          policy_loss: -0.009476071347792943
          total_loss: 1.7553863790300157
          vf_explained_var: 0.14602233469486237
          vf_loss: 1.7684660315513612
    num_agent_steps_sampled: 420000
    num_agent_steps_trained: 420000
    num_steps_sampled: 420000
    num_steps_trained: 420000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,420,10831.8,420000,-23.957,-18.7,-29.5,239.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 421000
  custom_metrics: {}
  date: 2021-10-21_22-46-38
  done: false
  episode_len_mean: 238.19
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.819000000000063
  episode_reward_min: -29.50000000000015
  episodes_this_iter: 4
  episodes_total: 1214
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.732178349082453e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.31014721079005136
          entropy_coeff: 0.009999999999999998
          kl: 0.0028698567046092196
          policy_loss: 0.02152099675602383
          total_loss: 1.321763277053833
          vf_explained_var: 0.19503042101860046
          vf_loss: 1.3033435371186999
    num_agent_steps_sampled: 421000
    num_agent_steps_trained: 421000
    num_steps_sampled: 421000
    num_steps_trained: 421000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,421,10867.8,421000,-23.819,-18.7,-29.5,238.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 422000
  custom_metrics: {}
  date: 2021-10-21_22-47-12
  done: false
  episode_len_mean: 236.91
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.691000000000066
  episode_reward_min: -29.50000000000015
  episodes_this_iter: 5
  episodes_total: 1219
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.8660891745412266e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.2866564301980866
          entropy_coeff: 0.009999999999999998
          kl: 0.0032470324014181214
          policy_loss: -0.07179962409039338
          total_loss: 1.4257779240608215
          vf_explained_var: 0.2506132423877716
          vf_loss: 1.500443987051646
    num_agent_steps_sampled: 422000
    num_agent_steps_trained: 422000
    num_steps_sampled: 422000
    num_steps_trained: 422000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,422,10902.1,422000,-23.691,-18.7,-29.5,236.91


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 423000
  custom_metrics: {}
  date: 2021-10-21_22-47-49
  done: false
  episode_len_mean: 235.44
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.54400000000006
  episode_reward_min: -29.50000000000015
  episodes_this_iter: 4
  episodes_total: 1223
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9330445872706133e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.22125754356384278
          entropy_coeff: 0.009999999999999998
          kl: 0.0023165025972966347
          policy_loss: 0.03580251617564095
          total_loss: 1.2004928198125628
          vf_explained_var: 0.1839982569217682
          vf_loss: 1.166902826892005
    num_agent_steps_sampled: 423000
    num_agent_steps_trained: 423000
    num_steps_sampled: 423000
    num_steps_trained: 423000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,423,10938.6,423000,-23.544,-18.7,-29.5,235.44


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 424000
  custom_metrics: {}
  date: 2021-10-21_22-48-23
  done: false
  episode_len_mean: 233.07
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.30700000000006
  episode_reward_min: -28.400000000000134
  episodes_this_iter: 5
  episodes_total: 1228
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.665222936353067e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.27546959767738977
          entropy_coeff: 0.009999999999999998
          kl: 0.004708290819846578
          policy_loss: -0.00033258568081590863
          total_loss: 1.420206946796841
          vf_explained_var: 0.3971748352050781
          vf_loss: 1.423294194539388
    num_agent_steps_sampled: 424000
    num_agent_steps_trained: 424000
    num_steps_sampled: 424000
    num_steps_trained: 424000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,424,10973.1,424000,-23.307,-18.7,-28.4,233.07




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 425000
  custom_metrics: {}
  date: 2021-10-21_22-49-15
  done: false
  episode_len_mean: 231.33
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.133000000000056
  episode_reward_min: -28.400000000000134
  episodes_this_iter: 4
  episodes_total: 1232
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.832611468176533e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5747244377930959
          entropy_coeff: 0.009999999999999998
          kl: 0.0260211636938167
          policy_loss: 0.01360129788517952
          total_loss: 1.2692053463723925
          vf_explained_var: 0.46856722235679626
          vf_loss: 1.2613511588838366
    num_agent_steps_sampled: 425000
    num_agent_steps_trained: 425000
    num_steps_sampled: 425000
    num_steps_trained: 425000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,425,11024.4,425000,-23.133,-18.7,-28.4,231.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 426000
  custom_metrics: {}
  date: 2021-10-21_22-49-47
  done: false
  episode_len_mean: 230.94
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.094000000000054
  episode_reward_min: -29.400000000000148
  episodes_this_iter: 4
  episodes_total: 1236
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.248917202264803e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0109502640035417
          entropy_coeff: 0.009999999999999998
          kl: 0.029161200474282663
          policy_loss: 0.029713967359728283
          total_loss: 1.2070531381501093
          vf_explained_var: 0.2877449691295624
          vf_loss: 1.1874484678109487
    num_agent_steps_sampled: 426000
    num_agent_steps_trained: 426000
    num_steps_sampled: 426000
    num_steps_trained: 426000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,426,11056.6,426000,-23.094,-18.7,-29.4,230.94


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 427000
  custom_metrics: {}
  date: 2021-10-21_22-50-21
  done: false
  episode_len_mean: 230.54
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.054000000000055
  episode_reward_min: -29.400000000000148
  episodes_this_iter: 4
  episodes_total: 1240
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0873375803397205e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6543237451050017
          entropy_coeff: 0.009999999999999998
          kl: 0.02556884696399035
          policy_loss: -0.14989073127508162
          total_loss: 1.0090106500519647
          vf_explained_var: 0.4049330949783325
          vf_loss: 1.1654443502426148
    num_agent_steps_sampled: 427000
    num_agent_steps_trained: 427000
    num_steps_sampled: 427000
    num_steps_trained: 427000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,427,11090.7,427000,-23.054,-18.7,-29.4,230.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 428000
  custom_metrics: {}
  date: 2021-10-21_22-50-57
  done: false
  episode_len_mean: 229.65
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.965000000000053
  episode_reward_min: -29.400000000000148
  episodes_this_iter: 5
  episodes_total: 1245
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6310063705095796e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.4717043528954188
          entropy_coeff: 0.009999999999999998
          kl: 0.04873689998925307
          policy_loss: -0.012720946056975258
          total_loss: 0.9103412744071748
          vf_explained_var: 0.48274239897727966
          vf_loss: 0.927778465880288
    num_agent_steps_sampled: 428000
    num_agent_steps_trained: 428000
    num_steps_sampled: 428000
    num_steps_trained: 428000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,428,11126.8,428000,-22.965,-18.7,-29.4,229.65


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 429000
  custom_metrics: {}
  date: 2021-10-21_22-51-34
  done: false
  episode_len_mean: 228.11
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.81100000000005
  episode_reward_min: -29.400000000000148
  episodes_this_iter: 4
  episodes_total: 1249
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4465095557643707e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.30810580733749604
          entropy_coeff: 0.009999999999999998
          kl: 0.010024652298160991
          policy_loss: -0.04121177440716161
          total_loss: 1.3515144440862867
          vf_explained_var: 0.2548505961894989
          vf_loss: 1.395807022518582
    num_agent_steps_sampled: 429000
    num_agent_steps_trained: 429000
    num_steps_sampled: 429000
    num_steps_trained: 429000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,429,11163.9,429000,-22.811,-18.7,-29.4,228.11


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 430000
  custom_metrics: {}
  date: 2021-10-21_22-52-11
  done: false
  episode_len_mean: 225.88
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.588000000000047
  episode_reward_min: -29.400000000000148
  episodes_this_iter: 5
  episodes_total: 1254
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4465095557643707e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.22963867684205372
          entropy_coeff: 0.009999999999999998
          kl: 0.007915484749411127
          policy_loss: 0.008187681798719697
          total_loss: 1.1492062475946214
          vf_explained_var: 0.513183057308197
          vf_loss: 1.1433147741688623
    num_agent_steps_sampled: 430000
    num_agent_steps_trained: 430000
    num_steps_sampled: 430000
    num_steps_trained: 430000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,430,11200.2,430000,-22.588,-18.7,-29.4,225.88


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 431000
  custom_metrics: {}
  date: 2021-10-21_22-52-44
  done: false
  episode_len_mean: 225.19
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.519000000000055
  episode_reward_min: -29.400000000000148
  episodes_this_iter: 4
  episodes_total: 1258
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4465095557643707e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.522627386616336
          entropy_coeff: 0.009999999999999998
          kl: 0.01563184291571894
          policy_loss: -0.027751983288261624
          total_loss: 0.9744389375050863
          vf_explained_var: 0.41744279861450195
          vf_loss: 1.0074168258243137
    num_agent_steps_sampled: 431000
    num_agent_steps_trained: 431000
    num_steps_sampled: 431000
    num_steps_trained: 431000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,431,11233.8,431000,-22.519,-18.7,-29.4,225.19




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 432000
  custom_metrics: {}
  date: 2021-10-21_22-53-30
  done: false
  episode_len_mean: 225.39
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.539000000000048
  episode_reward_min: -29.400000000000148
  episodes_this_iter: 4
  episodes_total: 1262
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4465095557643707e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7562128782272339
          entropy_coeff: 0.009999999999999998
          kl: 0.021979151771994214
          policy_loss: -0.017647869222693974
          total_loss: 0.8930090030034383
          vf_explained_var: 0.41242480278015137
          vf_loss: 0.918218469619751
    num_agent_steps_sampled: 432000
    num_agent_steps_trained: 432000
    num_steps_sampled: 432000
    num_steps_trained: 432000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,432,11279.7,432000,-22.539,-18.7,-29.4,225.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 433000
  custom_metrics: {}
  date: 2021-10-21_22-54-02
  done: false
  episode_len_mean: 225.64
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.564000000000046
  episode_reward_min: -29.400000000000148
  episodes_this_iter: 4
  episodes_total: 1266
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6697643336465566e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6596708771255281
          entropy_coeff: 0.009999999999999998
          kl: 0.05526222043176133
          policy_loss: 0.022047977811760374
          total_loss: 0.8479105247391595
          vf_explained_var: 0.40715962648391724
          vf_loss: 0.8324572318130069
    num_agent_steps_sampled: 433000
    num_agent_steps_trained: 433000
    num_steps_sampled: 433000
    num_steps_trained: 433000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,433,11311.3,433000,-22.564,-18.7,-29.4,225.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 434000
  custom_metrics: {}
  date: 2021-10-21_22-54-37
  done: false
  episode_len_mean: 226.12
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -22.612000000000048
  episode_reward_min: -29.400000000000148
  episodes_this_iter: 5
  episodes_total: 1271
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.5046465004698336e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.48506328927146064
          entropy_coeff: 0.009999999999999998
          kl: 0.07097399410363434
          policy_loss: 0.00565542783588171
          total_loss: 1.288962288035287
          vf_explained_var: 0.3180040717124939
          vf_loss: 1.2881535821490817
    num_agent_steps_sampled: 434000
    num_agent_steps_trained: 434000
    num_steps_sampled: 434000
    num_steps_trained: 434000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,434,11346.8,434000,-22.612,-19.4,-29.4,226.12


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 435000
  custom_metrics: {}
  date: 2021-10-21_22-55-10
  done: false
  episode_len_mean: 226.85
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -22.68500000000005
  episode_reward_min: -29.400000000000148
  episodes_this_iter: 4
  episodes_total: 1275
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.256969750704747e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5743073278003269
          entropy_coeff: 0.009999999999999998
          kl: 0.016206309529394343
          policy_loss: 0.010043446802430682
          total_loss: 0.8883382538954417
          vf_explained_var: 0.20926253497600555
          vf_loss: 0.8840365492635303
    num_agent_steps_sampled: 435000
    num_agent_steps_trained: 435000
    num_steps_sampled: 435000
    num_steps_trained: 435000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,435,11379.6,435000,-22.685,-19.4,-29.4,226.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 436000
  custom_metrics: {}
  date: 2021-10-21_22-55-45
  done: false
  episode_len_mean: 227.82
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -22.782000000000053
  episode_reward_min: -29.400000000000148
  episodes_this_iter: 4
  episodes_total: 1279
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.256969750704747e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5202819661961662
          entropy_coeff: 0.009999999999999998
          kl: 0.008256980118283429
          policy_loss: 0.07122422092490727
          total_loss: 0.7909221516715156
          vf_explained_var: 0.31762513518333435
          vf_loss: 0.7249000602298312
    num_agent_steps_sampled: 436000
    num_agent_steps_trained: 436000
    num_steps_sampled: 436000
    num_steps_trained: 436000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,436,11414.4,436000,-22.782,-19.4,-29.4,227.82


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 437000
  custom_metrics: {}
  date: 2021-10-21_22-56-13
  done: false
  episode_len_mean: 229.24
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -22.92400000000006
  episode_reward_min: -29.400000000000148
  episodes_this_iter: 3
  episodes_total: 1282
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.256969750704747e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.9831329200002882
          entropy_coeff: 0.009999999999999998
          kl: 0.03361187452077385
          policy_loss: -0.09988757487800386
          total_loss: 0.7154425220357047
          vf_explained_var: 0.48759740591049194
          vf_loss: 0.8251586625973384
    num_agent_steps_sampled: 437000
    num_agent_steps_trained: 437000
    num_steps_sampled: 437000
    num_steps_trained: 437000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,437,11442,437000,-22.924,-19.4,-29.4,229.24


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 438000
  custom_metrics: {}
  date: 2021-10-21_22-56-37
  done: false
  episode_len_mean: 232.41
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -23.241000000000057
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 3
  episodes_total: 1285
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001238545462605712
          cur_lr: 5.000000000000001e-05
          entropy: 0.9510213951269786
          entropy_coeff: 0.009999999999999998
          kl: 0.02726625796264632
          policy_loss: -0.08042731864584816
          total_loss: 0.4562601754234897
          vf_explained_var: 0.7571475505828857
          vf_loss: 0.5461943232350879
    num_agent_steps_sampled: 438000
    num_agent_steps_trained: 438000
    num_steps_sampled: 438000
    num_steps_trained: 438000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,438,11466,438000,-23.241,-19.4,-37.1,232.41


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 439000
  custom_metrics: {}
  date: 2021-10-21_22-57-05
  done: false
  episode_len_mean: 235.45
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -23.54500000000006
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1289
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018578181939085688
          cur_lr: 5.000000000000001e-05
          entropy: 0.7331921418507894
          entropy_coeff: 0.009999999999999998
          kl: 0.009453476083121299
          policy_loss: 0.07973111006948683
          total_loss: 0.714284661743376
          vf_explained_var: 0.7599667906761169
          vf_loss: 0.6418837126758363
    num_agent_steps_sampled: 439000
    num_agent_steps_trained: 439000
    num_steps_sampled: 439000
    num_steps_trained: 439000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,439,11494.3,439000,-23.545,-19.4,-37.1,235.45




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 440000
  custom_metrics: {}
  date: 2021-10-21_22-57-50
  done: false
  episode_len_mean: 236.84
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -23.68400000000006
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 3
  episodes_total: 1292
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018578181939085688
          cur_lr: 5.000000000000001e-05
          entropy: 0.78549455470509
          entropy_coeff: 0.009999999999999998
          kl: 0.008511579884358708
          policy_loss: -0.11307170713941256
          total_loss: 0.3469790475236045
          vf_explained_var: 0.8552089929580688
          vf_loss: 0.4679041216770808
    num_agent_steps_sampled: 440000
    num_agent_steps_trained: 440000
    num_steps_sampled: 440000
    num_steps_trained: 440000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,440,11539.6,440000,-23.684,-19.4,-37.1,236.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 441000
  custom_metrics: {}
  date: 2021-10-21_22-58-20
  done: false
  episode_len_mean: 238.98
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -23.898000000000067
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1296
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018578181939085688
          cur_lr: 5.000000000000001e-05
          entropy: 0.6842359370655484
          entropy_coeff: 0.009999999999999998
          kl: 0.008191840847282824
          policy_loss: 0.01598649165696568
          total_loss: 0.7378202186690437
          vf_explained_var: 0.75191330909729
          vf_loss: 0.7286745654212103
    num_agent_steps_sampled: 441000
    num_agent_steps_trained: 441000
    num_steps_sampled: 441000
    num_steps_trained: 441000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,441,11569.1,441000,-23.898,-19.4,-37.1,238.98


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 442000
  custom_metrics: {}
  date: 2021-10-21_22-58-47
  done: false
  episode_len_mean: 240.95
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -24.095000000000077
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1300
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018578181939085688
          cur_lr: 5.000000000000001e-05
          entropy: 0.6965416040685442
          entropy_coeff: 0.009999999999999998
          kl: 0.014079916146661967
          policy_loss: 0.005456597275204129
          total_loss: 0.7101321551534865
          vf_explained_var: 0.7089216113090515
          vf_loss: 0.7116383595599068
    num_agent_steps_sampled: 442000
    num_agent_steps_trained: 442000
    num_steps_sampled: 442000
    num_steps_trained: 442000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,442,11596.1,442000,-24.095,-21.5,-37.1,240.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 443000
  custom_metrics: {}
  date: 2021-10-21_22-59-19
  done: false
  episode_len_mean: 242.53
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -24.253000000000075
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1304
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018578181939085688
          cur_lr: 5.000000000000001e-05
          entropy: 0.6041545728842418
          entropy_coeff: 0.009999999999999998
          kl: 0.017993900376590155
          policy_loss: -0.022317867560519112
          total_loss: 0.63809375166893
          vf_explained_var: 0.7028944492340088
          vf_loss: 0.6664498289426167
    num_agent_steps_sampled: 443000
    num_agent_steps_trained: 443000
    num_steps_sampled: 443000
    num_steps_trained: 443000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,443,11628.4,443000,-24.253,-21.5,-37.1,242.53


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 444000
  custom_metrics: {}
  date: 2021-10-21_22-59-50
  done: false
  episode_len_mean: 244.43
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -24.44300000000008
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1308
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018578181939085688
          cur_lr: 5.000000000000001e-05
          entropy: 0.5803771230909559
          entropy_coeff: 0.009999999999999998
          kl: 0.01918791558558088
          policy_loss: -0.006231078008810679
          total_loss: 0.6899778031640582
          vf_explained_var: 0.6484553813934326
          vf_loss: 0.7020090921057596
    num_agent_steps_sampled: 444000
    num_agent_steps_trained: 444000
    num_steps_sampled: 444000
    num_steps_trained: 444000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,444,11659.4,444000,-24.443,-21.5,-37.1,244.43


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 445000
  custom_metrics: {}
  date: 2021-10-21_23-00-22
  done: false
  episode_len_mean: 245.64
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -24.56400000000008
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 3
  episodes_total: 1311
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018578181939085688
          cur_lr: 5.000000000000001e-05
          entropy: 0.48608165350225235
          entropy_coeff: 0.009999999999999998
          kl: 0.006840846004413158
          policy_loss: -0.0857518961860074
          total_loss: 0.7772788557741377
          vf_explained_var: 0.48794665932655334
          vf_loss: 0.8678903069761065
    num_agent_steps_sampled: 445000
    num_agent_steps_trained: 445000
    num_steps_sampled: 445000
    num_steps_trained: 445000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,445,11690.9,445000,-24.564,-21.5,-37.1,245.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 446000
  custom_metrics: {}
  date: 2021-10-21_23-00-52
  done: false
  episode_len_mean: 247.34
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -24.734000000000083
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1315
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018578181939085688
          cur_lr: 5.000000000000001e-05
          entropy: 0.6108113732602861
          entropy_coeff: 0.009999999999999998
          kl: 0.017170025129554428
          policy_loss: 0.02221101207865609
          total_loss: 1.066414687368605
          vf_explained_var: 0.38904669880867004
          vf_loss: 1.050308605035146
    num_agent_steps_sampled: 446000
    num_agent_steps_trained: 446000
    num_steps_sampled: 446000
    num_steps_trained: 446000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,446,11721.7,446000,-24.734,-21.5,-37.1,247.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 447000
  custom_metrics: {}
  date: 2021-10-21_23-01-23
  done: false
  episode_len_mean: 248.5
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -24.850000000000087
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1319
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018578181939085688
          cur_lr: 5.000000000000001e-05
          entropy: 0.5290077209472657
          entropy_coeff: 0.009999999999999998
          kl: 0.009121955325997879
          policy_loss: 0.008777041484912236
          total_loss: 0.9328786426120335
          vf_explained_var: 0.4555966556072235
          vf_loss: 0.9293899827533298
    num_agent_steps_sampled: 447000
    num_agent_steps_trained: 447000
    num_steps_sampled: 447000
    num_steps_trained: 447000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,447,11752.5,447000,-24.85,-21.5,-37.1,248.5




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 448000
  custom_metrics: {}
  date: 2021-10-21_23-02-13
  done: false
  episode_len_mean: 249.85
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -24.98500000000008
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1323
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018578181939085688
          cur_lr: 5.000000000000001e-05
          entropy: 0.5792594088448418
          entropy_coeff: 0.009999999999999998
          kl: 0.011697922246960089
          policy_loss: 0.001253664493560791
          total_loss: 0.8864630798498789
          vf_explained_var: 0.5483872890472412
          vf_loss: 0.8909998324182299
    num_agent_steps_sampled: 448000
    num_agent_steps_trained: 448000
    num_steps_sampled: 448000
    num_steps_trained: 448000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,448,11802.2,448000,-24.985,-21.5,-37.1,249.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 449000
  custom_metrics: {}
  date: 2021-10-21_23-02-43
  done: false
  episode_len_mean: 251.66
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -25.166000000000086
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1327
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018578181939085688
          cur_lr: 5.000000000000001e-05
          entropy: 0.5601357426908281
          entropy_coeff: 0.009999999999999998
          kl: 0.013583487282424282
          policy_loss: 0.0339374906073014
          total_loss: 0.8283841874864366
          vf_explained_var: 0.581687867641449
          vf_loss: 0.8000455286767748
    num_agent_steps_sampled: 449000
    num_agent_steps_trained: 449000
    num_steps_sampled: 449000
    num_steps_trained: 449000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,449,11832.1,449000,-25.166,-21.5,-37.1,251.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 450000
  custom_metrics: {}
  date: 2021-10-21_23-03-13
  done: false
  episode_len_mean: 253.29
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -25.329000000000093
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1331
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018578181939085688
          cur_lr: 5.000000000000001e-05
          entropy: 0.4924821595350901
          entropy_coeff: 0.009999999999999998
          kl: 0.005449836555141808
          policy_loss: 0.03542209126883083
          total_loss: 0.9808344893985325
          vf_explained_var: 0.5082833766937256
          vf_loss: 0.950336201985677
    num_agent_steps_sampled: 450000
    num_agent_steps_trained: 450000
    num_steps_sampled: 450000
    num_steps_trained: 450000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,450,11862.3,450000,-25.329,-21.5,-37.1,253.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 451000
  custom_metrics: {}
  date: 2021-10-21_23-03-38
  done: false
  episode_len_mean: 254.88
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -25.488000000000092
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 3
  episodes_total: 1334
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018578181939085688
          cur_lr: 5.000000000000001e-05
          entropy: 0.566438247760137
          entropy_coeff: 0.009999999999999998
          kl: 0.0034969805688803823
          policy_loss: -0.03412684193915791
          total_loss: 0.9536569873491924
          vf_explained_var: 0.40301451086997986
          vf_loss: 0.9934475660324097
    num_agent_steps_sampled: 451000
    num_agent_steps_trained: 451000
    num_steps_sampled: 451000
    num_steps_trained: 451000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,451,11887.5,451000,-25.488,-21.5,-37.1,254.88


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 452000
  custom_metrics: {}
  date: 2021-10-21_23-04-06
  done: false
  episode_len_mean: 256.56
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -25.656000000000095
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1338
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.289090969542844e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5855570654074351
          entropy_coeff: 0.009999999999999998
          kl: 0.03373970606324978
          policy_loss: 0.021941977449589305
          total_loss: 1.1642186416520013
          vf_explained_var: 0.41954371333122253
          vf_loss: 1.1481290843751695
    num_agent_steps_sampled: 452000
    num_agent_steps_trained: 452000
    num_steps_sampled: 452000
    num_steps_trained: 452000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,452,11914.7,452000,-25.656,-21.5,-37.1,256.56


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 453000
  custom_metrics: {}
  date: 2021-10-21_23-04-39
  done: false
  episode_len_mean: 256.75
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -25.6750000000001
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1342
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001393363645431426
          cur_lr: 5.000000000000001e-05
          entropy: 0.4861328787273831
          entropy_coeff: 0.009999999999999998
          kl: 0.0078601669927407
          policy_loss: 0.04131952565577295
          total_loss: 1.1240732265843285
          vf_explained_var: 0.3343965709209442
          vf_loss: 1.08761392765575
    num_agent_steps_sampled: 453000
    num_agent_steps_trained: 453000
    num_steps_sampled: 453000
    num_steps_trained: 453000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,453,11948,453000,-25.675,-21.5,-37.1,256.75


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 454000
  custom_metrics: {}
  date: 2021-10-21_23-05-12
  done: false
  episode_len_mean: 257.46
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -25.746000000000098
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1346
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001393363645431426
          cur_lr: 5.000000000000001e-05
          entropy: 0.6236009816328685
          entropy_coeff: 0.009999999999999998
          kl: 0.05651855557206223
          policy_loss: 0.03489101255933444
          total_loss: 0.985687729385164
          vf_explained_var: 0.2736316919326782
          vf_loss: 0.9570248544216156
    num_agent_steps_sampled: 454000
    num_agent_steps_trained: 454000
    num_steps_sampled: 454000
    num_steps_trained: 454000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,454,11980.8,454000,-25.746,-21.5,-37.1,257.46




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 455000
  custom_metrics: {}
  date: 2021-10-21_23-05-56
  done: false
  episode_len_mean: 259.84
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -25.9840000000001
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1350
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00020900454681471394
          cur_lr: 5.000000000000001e-05
          entropy: 0.8166952868302663
          entropy_coeff: 0.009999999999999998
          kl: 0.012716604770076698
          policy_loss: 0.018706398819469745
          total_loss: 1.1675149427519904
          vf_explained_var: 0.37284502387046814
          vf_loss: 1.15697283744812
    num_agent_steps_sampled: 455000
    num_agent_steps_trained: 455000
    num_steps_sampled: 455000
    num_steps_trained: 455000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,455,12025.1,455000,-25.984,-21.5,-37.1,259.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 456000
  custom_metrics: {}
  date: 2021-10-21_23-06-26
  done: false
  episode_len_mean: 262.05
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -26.2050000000001
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 3
  episodes_total: 1353
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00020900454681471394
          cur_lr: 5.000000000000001e-05
          entropy: 0.9039352397123973
          entropy_coeff: 0.009999999999999998
          kl: 0.027696267745698736
          policy_loss: 0.04779569059610367
          total_loss: 1.035485084189309
          vf_explained_var: 0.3137533962726593
          vf_loss: 0.9967229495445887
    num_agent_steps_sampled: 456000
    num_agent_steps_trained: 456000
    num_steps_sampled: 456000
    num_steps_trained: 456000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,456,12054.6,456000,-26.205,-21.5,-37.1,262.05


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 457000
  custom_metrics: {}
  date: 2021-10-21_23-06-58
  done: false
  episode_len_mean: 263.23
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -26.323000000000107
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1357
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003135068202220709
          cur_lr: 5.000000000000001e-05
          entropy: 0.8132425328095754
          entropy_coeff: 0.009999999999999998
          kl: 0.009338875819909494
          policy_loss: 0.05481521503792869
          total_loss: 1.0985457572672102
          vf_explained_var: 0.23290123045444489
          vf_loss: 1.0518600321478313
    num_agent_steps_sampled: 457000
    num_agent_steps_trained: 457000
    num_steps_sampled: 457000
    num_steps_trained: 457000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,457,12086.8,457000,-26.323,-21.5,-37.1,263.23


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 458000
  custom_metrics: {}
  date: 2021-10-21_23-07-28
  done: false
  episode_len_mean: 263.78
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -26.378000000000107
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1361
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003135068202220709
          cur_lr: 5.000000000000001e-05
          entropy: 0.9638897538185119
          entropy_coeff: 0.009999999999999998
          kl: 0.06886726740125976
          policy_loss: 0.03973707221448421
          total_loss: 1.3101468609439002
          vf_explained_var: 0.2750771641731262
          vf_loss: 1.2800271140204535
    num_agent_steps_sampled: 458000
    num_agent_steps_trained: 458000
    num_steps_sampled: 458000
    num_steps_trained: 458000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,458,12117.1,458000,-26.378,-21.5,-37.1,263.78


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 459000
  custom_metrics: {}
  date: 2021-10-21_23-08-03
  done: false
  episode_len_mean: 262.93
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -26.293000000000106
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1365
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00047026023033310633
          cur_lr: 5.000000000000001e-05
          entropy: 0.4918967048327128
          entropy_coeff: 0.009999999999999998
          kl: 0.005967433790793544
          policy_loss: 0.0009087312552664015
          total_loss: 1.2322637220223744
          vf_explained_var: 0.1912657618522644
          vf_loss: 1.2362711760732863
    num_agent_steps_sampled: 459000
    num_agent_steps_trained: 459000
    num_steps_sampled: 459000
    num_steps_trained: 459000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,459,12151.6,459000,-26.293,-21.5,-37.1,262.93


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 460000
  custom_metrics: {}
  date: 2021-10-21_23-08-32
  done: false
  episode_len_mean: 263.43
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -26.343000000000107
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1369
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00047026023033310633
          cur_lr: 5.000000000000001e-05
          entropy: 0.5655756586127811
          entropy_coeff: 0.009999999999999998
          kl: 0.008212047120608348
          policy_loss: 0.03184884496861034
          total_loss: 1.0792191869682737
          vf_explained_var: 0.33738601207733154
          vf_loss: 1.0530222323205736
    num_agent_steps_sampled: 460000
    num_agent_steps_trained: 460000
    num_steps_sampled: 460000
    num_steps_trained: 460000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,460,12181.3,460000,-26.343,-21.5,-37.1,263.43


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 461000
  custom_metrics: {}
  date: 2021-10-21_23-09-07
  done: false
  episode_len_mean: 263.15
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.315000000000104
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 5
  episodes_total: 1374
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00047026023033310633
          cur_lr: 5.000000000000001e-05
          entropy: 0.4634452369478014
          entropy_coeff: 0.009999999999999998
          kl: 0.01986793642903256
          policy_loss: -0.01867515254351828
          total_loss: 1.5997463001145258
          vf_explained_var: 0.10728544741868973
          vf_loss: 1.6230465557840135
    num_agent_steps_sampled: 461000
    num_agent_steps_trained: 461000
    num_steps_sampled: 461000
    num_steps_trained: 461000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,461,12216,461000,-26.315,-21.6,-37.1,263.15


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 462000
  custom_metrics: {}
  date: 2021-10-21_23-09-43
  done: false
  episode_len_mean: 261.56
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.1560000000001
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 1378
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00047026023033310633
          cur_lr: 5.000000000000001e-05
          entropy: 0.1831411803762118
          entropy_coeff: 0.009999999999999998
          kl: 0.0023325780422785176
          policy_loss: 0.03585114719139205
          total_loss: 1.2253148900138007
          vf_explained_var: 0.07041517645120621
          vf_loss: 1.1912940435939365
    num_agent_steps_sampled: 462000
    num_agent_steps_trained: 462000
    num_steps_sampled: 462000
    num_steps_trained: 462000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,462,12252.1,462000,-26.156,-21.6,-37.1,261.56




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 463000
  custom_metrics: {}
  date: 2021-10-21_23-10-37
  done: false
  episode_len_mean: 258.42
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -25.842000000000095
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 5
  episodes_total: 1383
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023513011516655317
          cur_lr: 5.000000000000001e-05
          entropy: 0.13979683650864494
          entropy_coeff: 0.009999999999999998
          kl: 0.001929358556904089
          policy_loss: -0.012898370954725477
          total_loss: 1.5666416552331712
          vf_explained_var: 0.09525249153375626
          vf_loss: 1.580937545829349
    num_agent_steps_sampled: 463000
    num_agent_steps_trained: 463000
    num_steps_sampled: 463000
    num_steps_trained: 463000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,463,12305.4,463000,-25.842,-18.8,-37.1,258.42


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 464000
  custom_metrics: {}
  date: 2021-10-21_23-11-12
  done: false
  episode_len_mean: 253.78
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -25.37800000000009
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 5
  episodes_total: 1388
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00011756505758327658
          cur_lr: 5.000000000000001e-05
          entropy: 0.1622754024962584
          entropy_coeff: 0.009999999999999998
          kl: 0.0018124044571041825
          policy_loss: -0.01609932602279716
          total_loss: 1.6377029803064134
          vf_explained_var: 0.09336812794208527
          vf_loss: 1.6554248491923014
    num_agent_steps_sampled: 464000
    num_agent_steps_trained: 464000
    num_steps_sampled: 464000
    num_steps_trained: 464000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,464,12341.2,464000,-25.378,-18.8,-32.3,253.78


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 465000
  custom_metrics: {}
  date: 2021-10-21_23-11-48
  done: false
  episode_len_mean: 251.22
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -25.12200000000008
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 4
  episodes_total: 1392
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.878252879163829e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.22158686253759596
          entropy_coeff: 0.009999999999999998
          kl: 0.003833935711486763
          policy_loss: 0.036966768900553386
          total_loss: 1.255851055516137
          vf_explained_var: 0.13480624556541443
          vf_loss: 1.221099943584866
    num_agent_steps_sampled: 465000
    num_agent_steps_trained: 465000
    num_steps_sampled: 465000
    num_steps_trained: 465000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,465,12377.3,465000,-25.122,-18.8,-32.3,251.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 466000
  custom_metrics: {}
  date: 2021-10-21_23-12-25
  done: false
  episode_len_mean: 249.54
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.954000000000082
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 4
  episodes_total: 1396
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9391264395819146e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5505952710906664
          entropy_coeff: 0.009999999999999998
          kl: 0.028990633279753472
          policy_loss: -0.00676309722993109
          total_loss: 1.2017111387517718
          vf_explained_var: 0.23421482741832733
          vf_loss: 1.2139793356259665
    num_agent_steps_sampled: 466000
    num_agent_steps_trained: 466000
    num_steps_sampled: 466000
    num_steps_trained: 466000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,466,12414.2,466000,-24.954,-18.8,-32.3,249.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 467000
  custom_metrics: {}
  date: 2021-10-21_23-13-03
  done: false
  episode_len_mean: 247.14
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.714000000000077
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 5
  episodes_total: 1401
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.4086896593728715e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.2776450999908977
          entropy_coeff: 0.009999999999999998
          kl: 0.007622090979441825
          policy_loss: 0.012427333659595913
          total_loss: 1.7929569827185736
          vf_explained_var: 0.0841117575764656
          vf_loss: 1.7833057469791835
    num_agent_steps_sampled: 467000
    num_agent_steps_trained: 467000
    num_steps_sampled: 467000
    num_steps_trained: 467000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,467,12451.9,467000,-24.714,-18.8,-32.3,247.14


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 468000
  custom_metrics: {}
  date: 2021-10-21_23-13-36
  done: false
  episode_len_mean: 246.41
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.641000000000076
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 4
  episodes_total: 1405
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.4086896593728715e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.48880570696459874
          entropy_coeff: 0.009999999999999998
          kl: 0.0045381547749959355
          policy_loss: 0.001131980452272627
          total_loss: 1.2325764735539755
          vf_explained_var: 0.2295425981283188
          vf_loss: 1.2363323469956715
    num_agent_steps_sampled: 468000
    num_agent_steps_trained: 468000
    num_steps_sampled: 468000
    num_steps_trained: 468000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,468,12484.9,468000,-24.641,-18.8,-32.3,246.41




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 469000
  custom_metrics: {}
  date: 2021-10-21_23-14-29
  done: false
  episode_len_mean: 244.24
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.424000000000078
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 5
  episodes_total: 1410
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2043448296864358e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.33612418696284296
          entropy_coeff: 0.009999999999999998
          kl: 0.010905875536887934
          policy_loss: 0.012768950644466611
          total_loss: 1.31587212714884
          vf_explained_var: 0.485687255859375
          vf_loss: 1.3064641843239466
    num_agent_steps_sampled: 469000
    num_agent_steps_trained: 469000
    num_steps_sampled: 469000
    num_steps_trained: 469000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,469,12537.2,469000,-24.424,-18.8,-32.3,244.24


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 470000
  custom_metrics: {}
  date: 2021-10-21_23-15-05
  done: false
  episode_len_mean: 243.43
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.343000000000075
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 4
  episodes_total: 1414
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2043448296864358e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5215439579553074
          entropy_coeff: 0.009999999999999998
          kl: 0.03494067103667528
          policy_loss: 0.056240399844116634
          total_loss: 1.0663227412435743
          vf_explained_var: 0.34891578555107117
          vf_loss: 1.0152970174948375
    num_agent_steps_sampled: 470000
    num_agent_steps_trained: 470000
    num_steps_sampled: 470000
    num_steps_trained: 470000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,470,12573.3,470000,-24.343,-18.8,-32.3,243.43


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 471000
  custom_metrics: {}
  date: 2021-10-21_23-15-41
  done: false
  episode_len_mean: 241.49
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.14900000000007
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 5
  episodes_total: 1419
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.306517244529652e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.13493672120902273
          entropy_coeff: 0.009999999999999998
          kl: 0.010595330275318915
          policy_loss: -0.01589283926619424
          total_loss: 1.7175763368606567
          vf_explained_var: 0.16661344468593597
          vf_loss: 1.7348181823889415
    num_agent_steps_sampled: 471000
    num_agent_steps_trained: 471000
    num_steps_sampled: 471000
    num_steps_trained: 471000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,471,12610,471000,-24.149,-18.8,-32.3,241.49


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 472000
  custom_metrics: {}
  date: 2021-10-21_23-16-19
  done: false
  episode_len_mean: 240.19
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.01900000000006
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 4
  episodes_total: 1423
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.306517244529652e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.13246598384446567
          entropy_coeff: 0.009999999999999998
          kl: 0.0021945344917215534
          policy_loss: 0.0397191738916768
          total_loss: 1.3348420805401273
          vf_explained_var: 0.08432511240243912
          vf_loss: 1.2964474989308252
    num_agent_steps_sampled: 472000
    num_agent_steps_trained: 472000
    num_steps_sampled: 472000
    num_steps_trained: 472000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,472,12647.8,472000,-24.019,-18.8,-32.3,240.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 473000
  custom_metrics: {}
  date: 2021-10-21_23-16-56
  done: false
  episode_len_mean: 237.96
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -23.796000000000067
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 5
  episodes_total: 1428
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.653258622264826e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.13512747370534473
          entropy_coeff: 0.009999999999999998
          kl: 0.004279843507609371
          policy_loss: -0.007172702832354439
          total_loss: 1.8223639501465692
          vf_explained_var: 0.10589303821325302
          vf_loss: 1.8308878435028924
    num_agent_steps_sampled: 473000
    num_agent_steps_trained: 473000
    num_steps_sampled: 473000
    num_steps_trained: 473000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,473,12684.5,473000,-23.796,-18.8,-32.3,237.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 474000
  custom_metrics: {}
  date: 2021-10-21_23-17-33
  done: false
  episode_len_mean: 235.89
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -23.589000000000066
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 4
  episodes_total: 1432
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.26629311132413e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.10619660077823533
          entropy_coeff: 0.009999999999999998
          kl: 0.0006763732642506268
          policy_loss: -0.027534933388233186
          total_loss: 1.3019393960634866
          vf_explained_var: 0.1257418841123581
          vf_loss: 1.330536296632555
    num_agent_steps_sampled: 474000
    num_agent_steps_trained: 474000
    num_steps_sampled: 474000
    num_steps_trained: 474000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,474,12721.9,474000,-23.589,-18.8,-32.3,235.89


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 475000
  custom_metrics: {}
  date: 2021-10-21_23-18-10
  done: false
  episode_len_mean: 232.25
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -23.225000000000055
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 5
  episodes_total: 1437
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.133146555662065e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.1264056252936522
          entropy_coeff: 0.009999999999999998
          kl: 0.0004833073740262724
          policy_loss: 0.02043590752614869
          total_loss: 1.468353059556749
          vf_explained_var: 0.1151915118098259
          vf_loss: 1.4491812335120307
    num_agent_steps_sampled: 475000
    num_agent_steps_trained: 475000
    num_steps_sampled: 475000
    num_steps_trained: 475000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,475,12758.8,475000,-23.225,-18.8,-32.3,232.25




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 476000
  custom_metrics: {}
  date: 2021-10-21_23-19-06
  done: false
  episode_len_mean: 230.02
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -23.002000000000052
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 5
  episodes_total: 1442
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0665732778310327e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.12058134219712681
          entropy_coeff: 0.009999999999999998
          kl: 0.0006686650394877505
          policy_loss: -0.013681573255194559
          total_loss: 1.702533451716105
          vf_explained_var: 0.08939369767904282
          vf_loss: 1.7174208283424377
    num_agent_steps_sampled: 476000
    num_agent_steps_trained: 476000
    num_steps_sampled: 476000
    num_steps_trained: 47600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,476,12814.1,476000,-23.002,-18.8,-32.3,230.02


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 477000
  custom_metrics: {}
  date: 2021-10-21_23-19-43
  done: false
  episode_len_mean: 229.06
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -22.906000000000052
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 4
  episodes_total: 1446
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0332866389155163e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.10248497869405482
          entropy_coeff: 0.009999999999999998
          kl: 0.0018516953884109258
          policy_loss: -0.012002310984664493
          total_loss: 1.3719940251774259
          vf_explained_var: 0.09246768802404404
          vf_loss: 1.3850211713049148
    num_agent_steps_sampled: 477000
    num_agent_steps_trained: 477000
    num_steps_sampled: 477000
    num_steps_trained: 4770

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,477,12851.4,477000,-22.906,-18.8,-32.3,229.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 478000
  custom_metrics: {}
  date: 2021-10-21_23-20-20
  done: false
  episode_len_mean: 226.06
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -22.606000000000048
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 5
  episodes_total: 1451
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.166433194577582e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.12220153030421999
          entropy_coeff: 0.009999999999999998
          kl: 0.0018754447709841237
          policy_loss: 0.002864122473531299
          total_loss: 1.8236277474297418
          vf_explained_var: 0.12368717044591904
          vf_loss: 1.8219856235716079
    num_agent_steps_sampled: 478000
    num_agent_steps_trained: 478000
    num_steps_sampled: 478000
    num_steps_trained: 478000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,478,12888.6,478000,-22.606,-18.8,-32.3,226.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 479000
  custom_metrics: {}
  date: 2021-10-21_23-20-57
  done: false
  episode_len_mean: 223.6
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -22.360000000000046
  episode_reward_min: -28.300000000000132
  episodes_this_iter: 5
  episodes_total: 1456
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.583216597288791e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.1365699494878451
          entropy_coeff: 0.009999999999999998
          kl: 0.010006119112768866
          policy_loss: 0.008409321308135986
          total_loss: 1.6503231942653656
          vf_explained_var: 0.3673584461212158
          vf_loss: 1.6432795763015746
    num_agent_steps_sampled: 479000
    num_agent_steps_trained: 479000
    num_steps_sampled: 479000
    num_steps_trained: 479000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,479,12925.5,479000,-22.36,-18.8,-28.3,223.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 480000
  custom_metrics: {}
  date: 2021-10-21_23-21-35
  done: false
  episode_len_mean: 221.74
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -22.17400000000004
  episode_reward_min: -28.300000000000132
  episodes_this_iter: 4
  episodes_total: 1460
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.583216597288791e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.1107520482606358
          entropy_coeff: 0.009999999999999998
          kl: 0.0008603169136860384
          policy_loss: -0.024041434791353015
          total_loss: 1.0998970303270552
          vf_explained_var: 0.2660493552684784
          vf_loss: 1.1250459889570872
    num_agent_steps_sampled: 480000
    num_agent_steps_trained: 480000
    num_steps_sampled: 480000
    num_steps_trained: 480000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,480,12963,480000,-22.174,-18.8,-28.3,221.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 481000
  custom_metrics: {}
  date: 2021-10-21_23-22-12
  done: false
  episode_len_mean: 220.51
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -22.051000000000034
  episode_reward_min: -28.300000000000132
  episodes_this_iter: 5
  episodes_total: 1465
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2916082986443954e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.10631170579128796
          entropy_coeff: 0.009999999999999998
          kl: 0.0013922709553308296
          policy_loss: -0.07407379572590192
          total_loss: 1.3448577066262564
          vf_explained_var: 0.39302146434783936
          vf_loss: 1.4199946098857457
    num_agent_steps_sampled: 481000
    num_agent_steps_trained: 481000
    num_steps_sampled: 481000
    num_steps_trained: 4810

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,481,12999.9,481000,-22.051,-18.8,-28.3,220.51




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 482000
  custom_metrics: {}
  date: 2021-10-21_23-23-06
  done: false
  episode_len_mean: 218.71
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -21.871000000000034
  episode_reward_min: -28.300000000000132
  episodes_this_iter: 5
  episodes_total: 1470
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.458041493221977e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.14861457720398902
          entropy_coeff: 0.009999999999999998
          kl: 0.004307615147343963
          policy_loss: 0.005012283474206925
          total_loss: 0.8866295648945702
          vf_explained_var: 0.5758628249168396
          vf_loss: 0.8831034335825179
    num_agent_steps_sampled: 482000
    num_agent_steps_trained: 482000
    num_steps_sampled: 482000
    num_steps_trained: 482000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,482,13054.2,482000,-21.871,-18.8,-28.3,218.71


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 483000
  custom_metrics: {}
  date: 2021-10-21_23-23-43
  done: false
  episode_len_mean: 218.71
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -21.87100000000004
  episode_reward_min: -28.300000000000132
  episodes_this_iter: 4
  episodes_total: 1474
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2290207466109886e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.2058803410993682
          entropy_coeff: 0.009999999999999998
          kl: 0.001211224005911207
          policy_loss: 0.03025127450625102
          total_loss: 0.5188761310444938
          vf_explained_var: 0.7709019184112549
          vf_loss: 0.4906836579243342
    num_agent_steps_sampled: 483000
    num_agent_steps_trained: 483000
    num_steps_sampled: 483000
    num_steps_trained: 483000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,483,13091.3,483000,-21.871,-18.8,-28.3,218.71


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 484000
  custom_metrics: {}
  date: 2021-10-21_23-24-17
  done: false
  episode_len_mean: 218.81
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -21.88100000000004
  episode_reward_min: -28.300000000000132
  episodes_this_iter: 5
  episodes_total: 1479
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6145103733054943e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.2117581715186437
          entropy_coeff: 0.009999999999999998
          kl: 0.001347950845194765
          policy_loss: -0.0006896071963840061
          total_loss: 0.6416644566588932
          vf_explained_var: 0.7438759803771973
          vf_loss: 0.6444716460174984
    num_agent_steps_sampled: 484000
    num_agent_steps_trained: 484000
    num_steps_sampled: 484000
    num_steps_trained: 484000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,484,13125.7,484000,-21.881,-18.8,-28.3,218.81


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 485000
  custom_metrics: {}
  date: 2021-10-21_23-24-54
  done: false
  episode_len_mean: 219.26
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -21.92600000000004
  episode_reward_min: -28.300000000000132
  episodes_this_iter: 4
  episodes_total: 1483
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.072551866527471e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.24410681145058738
          entropy_coeff: 0.009999999999999998
          kl: 0.001245907532964768
          policy_loss: -0.010265080879131952
          total_loss: 0.48672822217146555
          vf_explained_var: 0.723287045955658
          vf_loss: 0.4994343681467904
    num_agent_steps_sampled: 485000
    num_agent_steps_trained: 485000
    num_steps_sampled: 485000
    num_steps_trained: 485000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,485,13162.5,485000,-21.926,-18.9,-28.3,219.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 486000
  custom_metrics: {}
  date: 2021-10-21_23-25-30
  done: false
  episode_len_mean: 219.52
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -21.95200000000004
  episode_reward_min: -28.300000000000132
  episodes_this_iter: 5
  episodes_total: 1488
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.036275933263736e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.3118959072563383
          entropy_coeff: 0.009999999999999998
          kl: 0.0018399442611766008
          policy_loss: -0.009428339865472582
          total_loss: 0.7335627032650842
          vf_explained_var: 0.6136440634727478
          vf_loss: 0.7461099949147966
    num_agent_steps_sampled: 486000
    num_agent_steps_trained: 486000
    num_steps_sampled: 486000
    num_steps_trained: 486000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,486,13198.5,486000,-21.952,-18.9,-28.3,219.52


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 487000
  custom_metrics: {}
  date: 2021-10-21_23-26-05
  done: false
  episode_len_mean: 220.09
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.009000000000043
  episode_reward_min: -28.300000000000132
  episodes_this_iter: 4
  episodes_total: 1492
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.018137966631868e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.3671679801411099
          entropy_coeff: 0.009999999999999998
          kl: 0.0021030021374842693
          policy_loss: 0.04801501979430516
          total_loss: 0.7539320117897458
          vf_explained_var: 0.4799993336200714
          vf_loss: 0.7095886687437694
    num_agent_steps_sampled: 487000
    num_agent_steps_trained: 487000
    num_steps_sampled: 487000
    num_steps_trained: 487000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,487,13233.7,487000,-22.009,-18.9,-28.3,220.09


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 488000
  custom_metrics: {}
  date: 2021-10-21_23-26-40
  done: false
  episode_len_mean: 219.93
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -21.99300000000004
  episode_reward_min: -27.000000000000114
  episodes_this_iter: 4
  episodes_total: 1496
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.009068983315934e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.5012345598803626
          entropy_coeff: 0.009999999999999998
          kl: 0.0025207445329665274
          policy_loss: -0.008182461435596148
          total_loss: 0.7500719010829926
          vf_explained_var: 0.4615827202796936
          vf_loss: 0.763266713751687
    num_agent_steps_sampled: 488000
    num_agent_steps_trained: 488000
    num_steps_sampled: 488000
    num_steps_trained: 488000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,488,13268.2,488000,-21.993,-18.9,-27,219.93




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 489000
  custom_metrics: {}
  date: 2021-10-21_23-27-32
  done: false
  episode_len_mean: 220.75
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.07500000000005
  episode_reward_min: -27.000000000000114
  episodes_this_iter: 5
  episodes_total: 1501
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.04534491657967e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5034429056776895
          entropy_coeff: 0.009999999999999998
          kl: 0.007526395185497216
          policy_loss: -0.04348959376414617
          total_loss: 0.9617112669679854
          vf_explained_var: 0.4295051693916321
          vf_loss: 1.010235286421246
    num_agent_steps_sampled: 489000
    num_agent_steps_trained: 489000
    num_steps_sampled: 489000
    num_steps_trained: 489000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,489,13320.7,489000,-22.075,-18.9,-27,220.75


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 490000
  custom_metrics: {}
  date: 2021-10-21_23-28-07
  done: false
  episode_len_mean: 220.74
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.074000000000048
  episode_reward_min: -26.80000000000011
  episodes_this_iter: 4
  episodes_total: 1505
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.04534491657967e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6203412122196621
          entropy_coeff: 0.009999999999999998
          kl: 0.013352256246099339
          policy_loss: 0.03299719939629237
          total_loss: 0.7553746077749465
          vf_explained_var: 0.573161244392395
          vf_loss: 0.7285808232095506
    num_agent_steps_sampled: 490000
    num_agent_steps_trained: 490000
    num_steps_sampled: 490000
    num_steps_trained: 490000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,490,13355.5,490000,-22.074,-18.9,-26.8,220.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 491000
  custom_metrics: {}
  date: 2021-10-21_23-28-39
  done: false
  episode_len_mean: 221.85
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.185000000000045
  episode_reward_min: -26.80000000000011
  episodes_this_iter: 4
  episodes_total: 1509
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.04534491657967e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8924529506100549
          entropy_coeff: 0.009999999999999998
          kl: 0.01580236062438571
          policy_loss: 0.050501987172497646
          total_loss: 0.9293842958079443
          vf_explained_var: 0.2635364234447479
          vf_loss: 0.8878068182203505
    num_agent_steps_sampled: 491000
    num_agent_steps_trained: 491000
    num_steps_sampled: 491000
    num_steps_trained: 491000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,491,13387.6,491000,-22.185,-18.9,-26.8,221.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 492000
  custom_metrics: {}
  date: 2021-10-21_23-29-08
  done: false
  episode_len_mean: 223.17
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.317000000000043
  episode_reward_min: -27.200000000000117
  episodes_this_iter: 4
  episodes_total: 1513
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.04534491657967e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0655355731646219
          entropy_coeff: 0.009999999999999998
          kl: 0.005457435398196095
          policy_loss: -0.013078900054097176
          total_loss: 0.9680670274628533
          vf_explained_var: 0.2078240066766739
          vf_loss: 0.9918012963400946
    num_agent_steps_sampled: 492000
    num_agent_steps_trained: 492000
    num_steps_sampled: 492000
    num_steps_trained: 492000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,492,13416.4,492000,-22.317,-18.9,-27.2,223.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 493000
  custom_metrics: {}
  date: 2021-10-21_23-29-38
  done: false
  episode_len_mean: 225.2
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.52000000000005
  episode_reward_min: -29.50000000000015
  episodes_this_iter: 3
  episodes_total: 1516
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.04534491657967e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.1722422003746034
          entropy_coeff: 0.009999999999999998
          kl: 0.008214151670176683
          policy_loss: 0.04830473976002799
          total_loss: 0.8098144557740953
          vf_explained_var: 0.1472797989845276
          vf_loss: 0.7732321384880277
    num_agent_steps_sampled: 493000
    num_agent_steps_trained: 493000
    num_steps_sampled: 493000
    num_steps_trained: 493000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,493,13446.2,493000,-22.52,-18.9,-29.5,225.2


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 494000
  custom_metrics: {}
  date: 2021-10-21_23-30-07
  done: false
  episode_len_mean: 227.93
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.793000000000053
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1520
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.04534491657967e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.1194476816389296
          entropy_coeff: 0.009999999999999998
          kl: 0.006633875215394862
          policy_loss: -0.0027382996761136586
          total_loss: 1.1812786738077798
          vf_explained_var: 0.14791488647460938
          vf_loss: 1.1952114502588909
    num_agent_steps_sampled: 494000
    num_agent_steps_trained: 494000
    num_steps_sampled: 494000
    num_steps_trained: 494000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,494,13475.3,494000,-22.793,-18.9,-31.4,227.93


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 495000
  custom_metrics: {}
  date: 2021-10-21_23-30-39
  done: false
  episode_len_mean: 229.34
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.934000000000054
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 3
  episodes_total: 1523
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.04534491657967e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9030884709623125
          entropy_coeff: 0.009999999999999998
          kl: 0.013721965078294653
          policy_loss: -0.054302569644318684
          total_loss: 0.975501506196128
          vf_explained_var: 0.0947842076420784
          vf_loss: 1.0388349711894989
    num_agent_steps_sampled: 495000
    num_agent_steps_trained: 495000
    num_steps_sampled: 495000
    num_steps_trained: 495000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,495,13506.8,495000,-22.934,-18.9,-31.4,229.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 496000
  custom_metrics: {}
  date: 2021-10-21_23-31-10
  done: false
  episode_len_mean: 231.28
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -23.128000000000057
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1527
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.04534491657967e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7483172410064274
          entropy_coeff: 0.009999999999999998
          kl: 0.03339361326272628
          policy_loss: 0.0057378136449389985
          total_loss: 1.196400229136149
          vf_explained_var: 0.22942091524600983
          vf_loss: 1.1981456014845107
    num_agent_steps_sampled: 496000
    num_agent_steps_trained: 496000
    num_steps_sampled: 496000
    num_steps_trained: 496000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,496,13538.2,496000,-23.128,-18.9,-31.4,231.28




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 497000
  custom_metrics: {}
  date: 2021-10-21_23-32-02
  done: false
  episode_len_mean: 232.11
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -23.21100000000006
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1531
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.568017374869507e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5944484641154607
          entropy_coeff: 0.009999999999999998
          kl: 0.006359496730353461
          policy_loss: -0.05342793199751112
          total_loss: 1.1338329222467212
          vf_explained_var: 0.2601213753223419
          vf_loss: 1.1932053473260669
    num_agent_steps_sampled: 497000
    num_agent_steps_trained: 497000
    num_steps_sampled: 497000
    num_steps_trained: 497000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,497,13590.1,497000,-23.211,-18.9,-31.4,232.11


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 498000
  custom_metrics: {}
  date: 2021-10-21_23-32-35
  done: false
  episode_len_mean: 233.37
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -23.337000000000057
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1535
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.568017374869507e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6554086804389954
          entropy_coeff: 0.009999999999999998
          kl: 0.009482808924940085
          policy_loss: -0.07324756590856446
          total_loss: 1.3112427247895135
          vf_explained_var: 0.1501140594482422
          vf_loss: 1.3910443941752115
    num_agent_steps_sampled: 498000
    num_agent_steps_trained: 498000
    num_steps_sampled: 498000
    num_steps_trained: 498000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,498,13623.4,498000,-23.337,-18.9,-31.4,233.37


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 499000
  custom_metrics: {}
  date: 2021-10-21_23-33-09
  done: false
  episode_len_mean: 234.89
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -23.48900000000006
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 5
  episodes_total: 1540
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.568017374869507e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.530330123172866
          entropy_coeff: 0.009999999999999998
          kl: 0.028418823315081533
          policy_loss: -0.024024572140640684
          total_loss: 1.3998650828997294
          vf_explained_var: 0.24412010610103607
          vf_loss: 1.4291929682095845
    num_agent_steps_sampled: 499000
    num_agent_steps_trained: 499000
    num_steps_sampled: 499000
    num_steps_trained: 499000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,499,13657.5,499000,-23.489,-19.1,-31.4,234.89


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 500000
  custom_metrics: {}
  date: 2021-10-21_23-33-43
  done: false
  episode_len_mean: 235.83
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -23.583000000000066
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1544
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1352026062304257e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6041866017712487
          entropy_coeff: 0.009999999999999998
          kl: 0.0033948550473678236
          policy_loss: 0.028559064865112303
          total_loss: 1.1006769776344298
          vf_explained_var: 0.29734185338020325
          vf_loss: 1.0781597673892975
    num_agent_steps_sampled: 500000
    num_agent_steps_trained: 500000
    num_steps_sampled: 500000
    num_steps_trained: 500000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,500,13691.3,500000,-23.583,-19.1,-31.4,235.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 501000
  custom_metrics: {}
  date: 2021-10-21_23-34-17
  done: false
  episode_len_mean: 236.48
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -23.64800000000007
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1548
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.676013031152128e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5973173333538904
          entropy_coeff: 0.009999999999999998
          kl: 0.0069332640626422895
          policy_loss: 0.03570198838909467
          total_loss: 1.195967842472924
          vf_explained_var: 0.22932277619838715
          vf_loss: 1.1662390271822611
    num_agent_steps_sampled: 501000
    num_agent_steps_trained: 501000
    num_steps_sampled: 501000
    num_steps_trained: 501000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,501,13724.5,501000,-23.648,-19.1,-31.4,236.48


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 502000
  custom_metrics: {}
  date: 2021-10-21_23-34-52
  done: false
  episode_len_mean: 237.61
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -23.76100000000007
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1552
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.676013031152128e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.655515558189816
          entropy_coeff: 0.009999999999999998
          kl: 0.004141126529246877
          policy_loss: 0.03953173591030969
          total_loss: 1.1624413132667542
          vf_explained_var: 0.18741267919540405
          vf_loss: 1.1294647375742595
    num_agent_steps_sampled: 502000
    num_agent_steps_trained: 502000
    num_steps_sampled: 502000
    num_steps_trained: 502000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,502,13759.9,502000,-23.761,-19.1,-31.4,237.61


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 503000
  custom_metrics: {}
  date: 2021-10-21_23-35-27
  done: false
  episode_len_mean: 238.53
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -23.85300000000007
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1556
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.838006515576064e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5754947344462077
          entropy_coeff: 0.009999999999999998
          kl: 0.0033858147951102624
          policy_loss: -0.02309053743051158
          total_loss: 1.2425409065352546
          vf_explained_var: 0.15051837265491486
          vf_loss: 1.2713863889376322
    num_agent_steps_sampled: 503000
    num_agent_steps_trained: 503000
    num_steps_sampled: 503000
    num_steps_trained: 503000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,503,13795.3,503000,-23.853,-19.1,-31.4,238.53




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 504000
  custom_metrics: {}
  date: 2021-10-21_23-36-21
  done: false
  episode_len_mean: 239.26
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -23.926000000000073
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 5
  episodes_total: 1561
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.419003257788032e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5554676208231184
          entropy_coeff: 0.009999999999999998
          kl: 0.01256974355387478
          policy_loss: -0.035535843670368196
          total_loss: 1.458907663822174
          vf_explained_var: 0.22542491555213928
          vf_loss: 1.4999981721242268
    num_agent_steps_sampled: 504000
    num_agent_steps_trained: 504000
    num_steps_sampled: 504000
    num_steps_trained: 504000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,504,13848.7,504000,-23.926,-19.1,-31.4,239.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 505000
  custom_metrics: {}
  date: 2021-10-21_23-36-56
  done: false
  episode_len_mean: 240.13
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -24.01300000000007
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1565
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.419003257788032e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6381608022583856
          entropy_coeff: 0.009999999999999998
          kl: 0.00868143948369951
          policy_loss: 0.04736967302030987
          total_loss: 1.101489210128784
          vf_explained_var: 0.1758469045162201
          vf_loss: 1.060501143667433
    num_agent_steps_sampled: 505000
    num_agent_steps_trained: 505000
    num_steps_sampled: 505000
    num_steps_trained: 505000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,505,13884,505000,-24.013,-19.1,-31.4,240.13


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 506000
  custom_metrics: {}
  date: 2021-10-21_23-37-29
  done: false
  episode_len_mean: 241.39
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -24.139000000000078
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1569
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.419003257788032e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5871198581324684
          entropy_coeff: 0.009999999999999998
          kl: 0.0337172057622775
          policy_loss: 0.03393683036168416
          total_loss: 1.0679631491502126
          vf_explained_var: 0.2220679670572281
          vf_loss: 1.0398974968327417
    num_agent_steps_sampled: 506000
    num_agent_steps_trained: 506000
    num_steps_sampled: 506000
    num_steps_trained: 506000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,506,13917,506000,-24.139,-19.1,-31.4,241.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 507000
  custom_metrics: {}
  date: 2021-10-21_23-38-03
  done: false
  episode_len_mean: 242.6
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -24.260000000000076
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1573
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1285048866820492e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5666756904787488
          entropy_coeff: 0.009999999999999998
          kl: 0.007722623574220538
          policy_loss: 0.06772955366306835
          total_loss: 0.9725759903589885
          vf_explained_var: 0.14536459743976593
          vf_loss: 0.9105131854613622
    num_agent_steps_sampled: 507000
    num_agent_steps_trained: 507000
    num_steps_sampled: 507000
    num_steps_trained: 507000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,507,13950.7,507000,-24.26,-19.4,-31.4,242.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 508000
  custom_metrics: {}
  date: 2021-10-21_23-38-39
  done: false
  episode_len_mean: 243.3
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -24.330000000000076
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1577
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1285048866820492e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6724844588173761
          entropy_coeff: 0.009999999999999998
          kl: 0.009136017588530956
          policy_loss: -0.0026537247829967075
          total_loss: 1.1626118103663126
          vf_explained_var: 0.2393919676542282
          vf_loss: 1.171990364127689
    num_agent_steps_sampled: 508000
    num_agent_steps_trained: 508000
    num_steps_sampled: 508000
    num_steps_trained: 508000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,508,13986.3,508000,-24.33,-19.4,-31.4,243.3


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 509000
  custom_metrics: {}
  date: 2021-10-21_23-39-12
  done: false
  episode_len_mean: 244.39
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -24.439000000000078
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1581
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1285048866820492e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6825617061720954
          entropy_coeff: 0.009999999999999998
          kl: 0.003166368246712755
          policy_loss: -0.004074560643898116
          total_loss: 1.1271355893876818
          vf_explained_var: 0.22069615125656128
          vf_loss: 1.1380357689327665
    num_agent_steps_sampled: 509000
    num_agent_steps_trained: 509000
    num_steps_sampled: 509000
    num_steps_trained: 50900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,509,14019.8,509000,-24.439,-19.4,-31.4,244.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 510000
  custom_metrics: {}
  date: 2021-10-21_23-39-42
  done: false
  episode_len_mean: 245.52
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -24.55200000000008
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1585
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0642524433410246e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7173361837863922
          entropy_coeff: 0.009999999999999998
          kl: 0.007045777792929423
          policy_loss: -0.014601142704486847
          total_loss: 1.128014290332794
          vf_explained_var: 0.18612807989120483
          vf_loss: 1.1497887863053216
    num_agent_steps_sampled: 510000
    num_agent_steps_trained: 510000
    num_steps_sampled: 510000
    num_steps_trained: 510000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,510,14049.7,510000,-24.552,-19.4,-31.4,245.52


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 511000
  custom_metrics: {}
  date: 2021-10-21_23-40-13
  done: false
  episode_len_mean: 246.4
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -24.640000000000082
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1589
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0642524433410246e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6415271076891157
          entropy_coeff: 0.009999999999999998
          kl: 0.01380931910360543
          policy_loss: -0.02330284110373921
          total_loss: 1.1506303588549296
          vf_explained_var: 0.1397716999053955
          vf_loss: 1.1803484797477721
    num_agent_steps_sampled: 511000
    num_agent_steps_trained: 511000
    num_steps_sampled: 511000
    num_steps_trained: 511000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,511,14081,511000,-24.64,-19.4,-31.4,246.4




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 512000
  custom_metrics: {}
  date: 2021-10-21_23-41-04
  done: false
  episode_len_mean: 246.85
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -24.68500000000008
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1593
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0642524433410246e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5213535772429572
          entropy_coeff: 0.009999999999999998
          kl: 0.02615117965453414
          policy_loss: -0.11002583553393681
          total_loss: 1.3891737620035807
          vf_explained_var: 0.2510114312171936
          vf_loss: 1.5044131146536932
    num_agent_steps_sampled: 512000
    num_agent_steps_trained: 512000
    num_steps_sampled: 512000
    num_steps_trained: 512000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,512,14131.6,512000,-24.685,-19.4,-31.4,246.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 513000
  custom_metrics: {}
  date: 2021-10-21_23-41-38
  done: false
  episode_len_mean: 246.4
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -24.640000000000082
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 5
  episodes_total: 1598
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5963786650115368e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6078653259409799
          entropy_coeff: 0.009999999999999998
          kl: 0.023156167493944856
          policy_loss: -0.001521896943449974
          total_loss: 1.0861581497722201
          vf_explained_var: 0.31373873353004456
          vf_loss: 1.0937586956553988
    num_agent_steps_sampled: 513000
    num_agent_steps_trained: 513000
    num_steps_sampled: 513000
    num_steps_trained: 513000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,513,14165.4,513000,-24.64,-19.4,-31.4,246.4


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 514000
  custom_metrics: {}
  date: 2021-10-21_23-42-11
  done: false
  episode_len_mean: 246.78
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -24.67800000000009
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1602
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.394567997517305e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6400951829221514
          entropy_coeff: 0.009999999999999998
          kl: 0.007462041704980551
          policy_loss: 0.03508627178768317
          total_loss: 1.047147661447525
          vf_explained_var: 0.24996712803840637
          vf_loss: 1.0184623453352186
    num_agent_steps_sampled: 514000
    num_agent_steps_trained: 514000
    num_steps_sampled: 514000
    num_steps_trained: 514000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,514,14198.3,514000,-24.678,-20.8,-31.4,246.78


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 515000
  custom_metrics: {}
  date: 2021-10-21_23-42-43
  done: false
  episode_len_mean: 247.42
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -24.74200000000008
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1606
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.394567997517305e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6819353699684143
          entropy_coeff: 0.009999999999999998
          kl: 0.0034664431748780288
          policy_loss: 0.025180195023616157
          total_loss: 1.109356537130144
          vf_explained_var: 0.20880958437919617
          vf_loss: 1.0909957011540732
    num_agent_steps_sampled: 515000
    num_agent_steps_trained: 515000
    num_steps_sampled: 515000
    num_steps_trained: 515000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,515,14230.9,515000,-24.742,-20.8,-31.4,247.42


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 516000
  custom_metrics: {}
  date: 2021-10-21_23-43-17
  done: false
  episode_len_mean: 246.59
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -24.659000000000084
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1610
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1972839987586525e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6080853091345892
          entropy_coeff: 0.009999999999999998
          kl: 0.0036324905834121068
          policy_loss: 0.02717429714070426
          total_loss: 1.2431447837087843
          vf_explained_var: 0.11498301476240158
          vf_loss: 1.2220513330565559
    num_agent_steps_sampled: 516000
    num_agent_steps_trained: 516000
    num_steps_sampled: 516000
    num_steps_trained: 516000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,516,14264.3,516000,-24.659,-20.8,-31.4,246.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 517000
  custom_metrics: {}
  date: 2021-10-21_23-43-51
  done: false
  episode_len_mean: 245.88
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -24.58800000000008
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1614
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.986419993793263e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7010266840457916
          entropy_coeff: 0.009999999999999998
          kl: 0.005904454268662028
          policy_loss: 0.04679861002498203
          total_loss: 1.0331880668799083
          vf_explained_var: 0.16150355339050293
          vf_loss: 0.9933997157547209
    num_agent_steps_sampled: 517000
    num_agent_steps_trained: 517000
    num_steps_sampled: 517000
    num_steps_trained: 517000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,517,14298.2,517000,-24.588,-20.8,-31.4,245.88


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 518000
  custom_metrics: {}
  date: 2021-10-21_23-44-23
  done: false
  episode_len_mean: 244.4
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -24.440000000000072
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 1618
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.986419993793263e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.8145756900310517
          entropy_coeff: 0.009999999999999998
          kl: 0.0042951122163512895
          policy_loss: 0.039901311530007255
          total_loss: 1.160833086570104
          vf_explained_var: 0.11001019179821014
          vf_loss: 1.1290775252713097
    num_agent_steps_sampled: 518000
    num_agent_steps_trained: 518000
    num_steps_sampled: 518000
    num_steps_trained: 518000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,518,14330.3,518000,-24.44,-20.8,-28.6,244.4




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 519000
  custom_metrics: {}
  date: 2021-10-21_23-45-15
  done: false
  episode_len_mean: 243.25
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -24.325000000000077
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 1622
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9932099968966314e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.8988105720943875
          entropy_coeff: 0.009999999999999998
          kl: 0.007174971969930905
          policy_loss: 0.007038765980137719
          total_loss: 1.3727704458766514
          vf_explained_var: 0.13458587229251862
          vf_loss: 1.3747197932667203
    num_agent_steps_sampled: 519000
    num_agent_steps_trained: 519000
    num_steps_sampled: 519000
    num_steps_trained: 519000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,519,14382.9,519000,-24.325,-20.3,-28.6,243.25


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 520000
  custom_metrics: {}
  date: 2021-10-21_23-45-49
  done: false
  episode_len_mean: 242.82
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -24.282000000000075
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 1626
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9932099968966314e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.714393236902025
          entropy_coeff: 0.009999999999999998
          kl: 0.006608243602723955
          policy_loss: 0.001697696124513944
          total_loss: 1.3765999992688498
          vf_explained_var: 0.13999967277050018
          vf_loss: 1.382046225335863
    num_agent_steps_sampled: 520000
    num_agent_steps_trained: 520000
    num_steps_sampled: 520000
    num_steps_trained: 520000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,520,14416.3,520000,-24.282,-20.3,-28.6,242.82


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 521000
  custom_metrics: {}
  date: 2021-10-21_23-46-20
  done: false
  episode_len_mean: 242.99
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -24.299000000000078
  episode_reward_min: -26.900000000000112
  episodes_this_iter: 4
  episodes_total: 1630
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9932099968966314e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7768033133612738
          entropy_coeff: 0.009999999999999998
          kl: 0.006275406051562824
          policy_loss: 0.00788582306769159
          total_loss: 1.342665934562683
          vf_explained_var: 0.1521047204732895
          vf_loss: 1.342548123995463
    num_agent_steps_sampled: 521000
    num_agent_steps_trained: 521000
    num_steps_sampled: 521000
    num_steps_trained: 521000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,521,14447.7,521000,-24.299,-20.3,-26.9,242.99


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 522000
  custom_metrics: {}
  date: 2021-10-21_23-46-49
  done: false
  episode_len_mean: 243.29
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -24.32900000000008
  episode_reward_min: -26.900000000000112
  episodes_this_iter: 4
  episodes_total: 1634
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9932099968966314e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7247005124886831
          entropy_coeff: 0.009999999999999998
          kl: 0.008386405561960132
          policy_loss: 0.04817970916628837
          total_loss: 1.1921590606371562
          vf_explained_var: 0.10786646604537964
          vf_loss: 1.1512263609303368
    num_agent_steps_sampled: 522000
    num_agent_steps_trained: 522000
    num_steps_sampled: 522000
    num_steps_trained: 522000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,522,14476.9,522000,-24.329,-20.3,-26.9,243.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 523000
  custom_metrics: {}
  date: 2021-10-21_23-47-17
  done: false
  episode_len_mean: 243.79
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -24.37900000000008
  episode_reward_min: -26.900000000000112
  episodes_this_iter: 4
  episodes_total: 1638
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9932099968966314e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7867457085185581
          entropy_coeff: 0.009999999999999998
          kl: 0.0073071702942828646
          policy_loss: 0.030119658592674466
          total_loss: 1.4198080433739557
          vf_explained_var: 0.11160262674093246
          vf_loss: 1.3975558479626973
    num_agent_steps_sampled: 523000
    num_agent_steps_trained: 523000
    num_steps_sampled: 523000
    num_steps_trained: 523000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,523,14504.5,523000,-24.379,-20.3,-26.9,243.79


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 524000
  custom_metrics: {}
  date: 2021-10-21_23-47-49
  done: false
  episode_len_mean: 244.29
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -24.429000000000084
  episode_reward_min: -26.900000000000112
  episodes_this_iter: 4
  episodes_total: 1642
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9932099968966314e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.8222023732132382
          entropy_coeff: 0.009999999999999998
          kl: 0.004752385539481926
          policy_loss: 0.021163992252614762
          total_loss: 1.391663788424598
          vf_explained_var: 0.13820049166679382
          vf_loss: 1.378721797466278
    num_agent_steps_sampled: 524000
    num_agent_steps_trained: 524000
    num_steps_sampled: 524000
    num_steps_trained: 524000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,524,14536,524000,-24.429,-20.3,-26.9,244.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 525000
  custom_metrics: {}
  date: 2021-10-21_23-48-20
  done: false
  episode_len_mean: 245.09
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -24.509000000000082
  episode_reward_min: -26.900000000000112
  episodes_this_iter: 4
  episodes_total: 1646
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4966049984483157e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.8981367700629764
          entropy_coeff: 0.009999999999999998
          kl: 0.0114602489421809
          policy_loss: 0.049393496165672936
          total_loss: 1.1628696295950147
          vf_explained_var: 0.11588598042726517
          vf_loss: 1.1224575022856393
    num_agent_steps_sampled: 525000
    num_agent_steps_trained: 525000
    num_steps_sampled: 525000
    num_steps_trained: 525000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,525,14567,525000,-24.509,-20.3,-26.9,245.09




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 526000
  custom_metrics: {}
  date: 2021-10-21_23-49-09
  done: false
  episode_len_mean: 245.34
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -24.534000000000074
  episode_reward_min: -26.900000000000112
  episodes_this_iter: 4
  episodes_total: 1650
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4966049984483157e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7856587237781949
          entropy_coeff: 0.009999999999999998
          kl: 0.01748731615785096
          policy_loss: 0.020345625446902382
          total_loss: 1.462253177165985
          vf_explained_var: 0.14214468002319336
          vf_loss: 1.4497641338242424
    num_agent_steps_sampled: 526000
    num_agent_steps_trained: 526000
    num_steps_sampled: 526000
    num_steps_trained: 526000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,526,14616.7,526000,-24.534,-20.3,-26.9,245.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 527000
  custom_metrics: {}
  date: 2021-10-21_23-49-39
  done: false
  episode_len_mean: 246.46
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -24.646000000000086
  episode_reward_min: -28.20000000000013
  episodes_this_iter: 4
  episodes_total: 1654
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4966049984483157e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.8513734135362837
          entropy_coeff: 0.009999999999999998
          kl: 0.00824281447140268
          policy_loss: -0.009684192472034031
          total_loss: 1.4549203726980422
          vf_explained_var: 0.15442527830600739
          vf_loss: 1.473118323749966
    num_agent_steps_sampled: 527000
    num_agent_steps_trained: 527000
    num_steps_sampled: 527000
    num_steps_trained: 527000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,527,14646.6,527000,-24.646,-20.3,-28.2,246.46


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 528000
  custom_metrics: {}
  date: 2021-10-21_23-50-11
  done: false
  episode_len_mean: 247.08
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -24.70800000000008
  episode_reward_min: -28.20000000000013
  episodes_this_iter: 4
  episodes_total: 1658
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4966049984483157e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7953936027155982
          entropy_coeff: 0.009999999999999998
          kl: 0.051780864352241926
          policy_loss: -0.0076541561219427325
          total_loss: 1.2830533504486084
          vf_explained_var: 0.1978057622909546
          vf_loss: 1.29866142405404
    num_agent_steps_sampled: 528000
    num_agent_steps_trained: 528000
    num_steps_sampled: 528000
    num_steps_trained: 528000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,528,14678.7,528000,-24.708,-20.3,-28.2,247.08


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 529000
  custom_metrics: {}
  date: 2021-10-21_23-50-40
  done: false
  episode_len_mean: 248.64
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -24.864000000000082
  episode_reward_min: -29.400000000000148
  episodes_this_iter: 3
  episodes_total: 1661
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.244907497672473e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.0941008845965068
          entropy_coeff: 0.009999999999999998
          kl: 0.020184652784233727
          policy_loss: 0.05155388166507085
          total_loss: 0.9424259642759959
          vf_explained_var: 0.27916738390922546
          vf_loss: 0.9018130977120664
    num_agent_steps_sampled: 529000
    num_agent_steps_trained: 529000
    num_steps_sampled: 529000
    num_steps_trained: 529000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,529,14707.8,529000,-24.864,-20.3,-29.4,248.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 530000
  custom_metrics: {}
  date: 2021-10-21_23-51-07
  done: false
  episode_len_mean: 250.88
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -25.088000000000083
  episode_reward_min: -35.00000000000023
  episodes_this_iter: 3
  episodes_total: 1664
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3673612465087104e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.2362881620724997
          entropy_coeff: 0.009999999999999998
          kl: 0.012220394188542805
          policy_loss: -0.06355163620577918
          total_loss: 1.1062552081214057
          vf_explained_var: 0.08696771413087845
          vf_loss: 1.1821697307957544
    num_agent_steps_sampled: 530000
    num_agent_steps_trained: 530000
    num_steps_sampled: 530000
    num_steps_trained: 530000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,530,14734,530000,-25.088,-20.3,-35,250.88


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 531000
  custom_metrics: {}
  date: 2021-10-21_23-51-34
  done: false
  episode_len_mean: 252.79
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -25.27900000000009
  episode_reward_min: -35.00000000000023
  episodes_this_iter: 4
  episodes_total: 1668
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3673612465087104e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.0675137281417846
          entropy_coeff: 0.009999999999999998
          kl: 0.008954549427754286
          policy_loss: 0.018841525167226793
          total_loss: 1.3256609320640564
          vf_explained_var: 0.14973941445350647
          vf_loss: 1.317494551340739
    num_agent_steps_sampled: 531000
    num_agent_steps_trained: 531000
    num_steps_sampled: 531000
    num_steps_trained: 531000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,531,14761.7,531000,-25.279,-20.3,-35,252.79


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 532000
  custom_metrics: {}
  date: 2021-10-21_23-51-57
  done: false
  episode_len_mean: 255.36
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -25.536000000000094
  episode_reward_min: -35.00000000000023
  episodes_this_iter: 3
  episodes_total: 1671
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3673612465087104e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.2831665211253696
          entropy_coeff: 0.009999999999999998
          kl: 0.013202517839485495
          policy_loss: 0.06084992272986306
          total_loss: 1.0048233820332422
          vf_explained_var: 0.2107832431793213
          vf_loss: 0.9568051267829206
    num_agent_steps_sampled: 532000
    num_agent_steps_trained: 532000
    num_steps_sampled: 532000
    num_steps_trained: 532000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,532,14783.8,532000,-25.536,-20.3,-35,255.36


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 533000
  custom_metrics: {}
  date: 2021-10-21_23-52-22
  done: false
  episode_len_mean: 257.99
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -25.799000000000095
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 3
  episodes_total: 1674
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3673612465087104e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.3207649098502265
          entropy_coeff: 0.009999999999999998
          kl: 0.022721120840659064
          policy_loss: 0.09169071051809523
          total_loss: 0.82927399178346
          vf_explained_var: 0.11268123984336853
          vf_loss: 0.7507909401630362
    num_agent_steps_sampled: 533000
    num_agent_steps_trained: 533000
    num_steps_sampled: 533000
    num_steps_trained: 533000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,533,14809.2,533000,-25.799,-20.3,-37.3,257.99


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 534000
  custom_metrics: {}
  date: 2021-10-21_23-52-45
  done: false
  episode_len_mean: 261.63
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -26.163000000000107
  episode_reward_min: -39.20000000000029
  episodes_this_iter: 3
  episodes_total: 1677
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.051041869763066e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.3310463428497314
          entropy_coeff: 0.009999999999999998
          kl: 0.0210216277947119
          policy_loss: 0.07685397060381041
          total_loss: 0.6803913613160452
          vf_explained_var: 0.145035982131958
          vf_loss: 0.6168478618065516
    num_agent_steps_sampled: 534000
    num_agent_steps_trained: 534000
    num_steps_sampled: 534000
    num_steps_trained: 534000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,534,14832.5,534000,-26.163,-20.3,-39.2,261.63


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 535000
  custom_metrics: {}
  date: 2021-10-21_23-53-08
  done: false
  episode_len_mean: 264.22
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -26.422000000000107
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 2
  episodes_total: 1679
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.576562804644598e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.3405815535121495
          entropy_coeff: 0.009999999999999998
          kl: 0.009632754046565297
          policy_loss: -0.08365251322587332
          total_loss: 1.0124076972405116
          vf_explained_var: 0.2054925262928009
          vf_loss: 1.1094660384787454
    num_agent_steps_sampled: 535000
    num_agent_steps_trained: 535000
    num_steps_sampled: 535000
    num_steps_trained: 535000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,535,14855.4,535000,-26.422,-20.3,-41.6,264.22




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 536000
  custom_metrics: {}
  date: 2021-10-21_23-53-48
  done: false
  episode_len_mean: 266.61
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -26.66100000000011
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 1682
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.576562804644598e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.2192161785231697
          entropy_coeff: 0.009999999999999998
          kl: 0.022819067249862515
          policy_loss: -0.1035131734278467
          total_loss: 1.3980188541942173
          vf_explained_var: 0.1770923137664795
          vf_loss: 1.5137241813871596
    num_agent_steps_sampled: 536000
    num_agent_steps_trained: 536000
    num_steps_sampled: 536000
    num_steps_trained: 536000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,536,14894.9,536000,-26.661,-20.3,-41.6,266.61


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 537000
  custom_metrics: {}
  date: 2021-10-21_23-54-09
  done: false
  episode_len_mean: 269.27
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -26.927000000000117
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 1685
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1364844206966896e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.1707067198223537
          entropy_coeff: 0.009999999999999998
          kl: 0.06458925820288515
          policy_loss: -0.1025648419227865
          total_loss: 1.2321507347954643
          vf_explained_var: 0.28630587458610535
          vf_loss: 1.3464226338598464
    num_agent_steps_sampled: 537000
    num_agent_steps_trained: 537000
    num_steps_sampled: 537000
    num_steps_trained: 537000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,537,14916,537000,-26.927,-20.3,-41.6,269.27


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 538000
  custom_metrics: {}
  date: 2021-10-21_23-54-30
  done: false
  episode_len_mean: 271.83
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -27.183000000000117
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 1688
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7047266310450347e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2233013841840956
          entropy_coeff: 0.009999999999999998
          kl: 0.009410397652010261
          policy_loss: -0.09521852665477329
          total_loss: 1.1940530604786344
          vf_explained_var: 0.24407316744327545
          vf_loss: 1.3015046053462558
    num_agent_steps_sampled: 538000
    num_agent_steps_trained: 538000
    num_steps_sampled: 538000
    num_steps_trained: 538000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,538,14937.5,538000,-27.183,-20.3,-41.6,271.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 539000
  custom_metrics: {}
  date: 2021-10-21_23-54-54
  done: false
  episode_len_mean: 275.16
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -27.516000000000123
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 1691
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7047266310450347e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2134511894649929
          entropy_coeff: 0.009999999999999998
          kl: 0.01062885420660567
          policy_loss: 0.028712446325355107
          total_loss: 0.8801032698816723
          vf_explained_var: 0.24661751091480255
          vf_loss: 0.8635253434379896
    num_agent_steps_sampled: 539000
    num_agent_steps_trained: 539000
    num_steps_sampled: 539000
    num_steps_trained: 539000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,539,14961.4,539000,-27.516,-20.3,-41.6,275.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 540000
  custom_metrics: {}
  date: 2021-10-21_23-55-20
  done: false
  episode_len_mean: 277.41
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -27.74100000000012
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 1694
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7047266310450347e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2292273574405246
          entropy_coeff: 0.009999999999999998
          kl: 0.08068325107939087
          policy_loss: -0.11202562244402038
          total_loss: 1.120972990989685
          vf_explained_var: 0.27487850189208984
          vf_loss: 1.24529087278578
    num_agent_steps_sampled: 540000
    num_agent_steps_trained: 540000
    num_steps_sampled: 540000
    num_steps_trained: 540000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,540,14987.2,540000,-27.741,-20.3,-41.6,277.41


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 541000
  custom_metrics: {}
  date: 2021-10-21_23-55-43
  done: false
  episode_len_mean: 280.83
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -28.08300000000013
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 1697
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5570899465675516e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.333041290442149
          entropy_coeff: 0.009999999999999998
          kl: 0.015342245029578984
          policy_loss: -0.009953174160586463
          total_loss: 0.8544523490799798
          vf_explained_var: 0.283377468585968
          vf_loss: 0.8777359382973777
    num_agent_steps_sampled: 541000
    num_agent_steps_trained: 541000
    num_steps_sampled: 541000
    num_steps_trained: 541000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,541,15010.3,541000,-28.083,-20.3,-41.6,280.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 542000
  custom_metrics: {}
  date: 2021-10-21_23-56-04
  done: false
  episode_len_mean: 283.88
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -28.38800000000013
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 1700
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5570899465675516e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.402075669500563
          entropy_coeff: 0.009999999999999998
          kl: 0.1528398941411123
          policy_loss: 0.023519441650973427
          total_loss: 0.7541154368056191
          vf_explained_var: 0.2986619770526886
          vf_loss: 0.7446167634179195
    num_agent_steps_sampled: 542000
    num_agent_steps_trained: 542000
    num_steps_sampled: 542000
    num_steps_trained: 542000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,542,15031.4,542000,-28.388,-20.3,-41.6,283.88


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 543000
  custom_metrics: {}
  date: 2021-10-21_23-56-23
  done: false
  episode_len_mean: 287.18
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -28.71800000000014
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 2
  episodes_total: 1702
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.835634919851328e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.5549171063635083
          entropy_coeff: 0.009999999999999998
          kl: 0.06849997536832436
          policy_loss: -0.08662858224577374
          total_loss: 0.6091037720441819
          vf_explained_var: 0.3734854459762573
          vf_loss: 0.7112815225082967
    num_agent_steps_sampled: 543000
    num_agent_steps_trained: 543000
    num_steps_sampled: 543000
    num_steps_trained: 543000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,543,15050.4,543000,-28.718,-20.3,-41.6,287.18


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 544000
  custom_metrics: {}
  date: 2021-10-21_23-56-42
  done: false
  episode_len_mean: 292.98
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -29.298000000000144
  episode_reward_min: -46.20000000000039
  episodes_this_iter: 3
  episodes_total: 1705
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.753452379776992e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.6376563866933187
          entropy_coeff: 0.009999999999999998
          kl: 0.037034843585604334
          policy_loss: 0.06068334529797236
          total_loss: 0.838807002041075
          vf_explained_var: 0.08318377286195755
          vf_loss: 0.7945002332743671
    num_agent_steps_sampled: 544000
    num_agent_steps_trained: 544000
    num_steps_sampled: 544000
    num_steps_trained: 544000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,544,15069.3,544000,-29.298,-20.3,-46.2,292.98


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 545000
  custom_metrics: {}
  date: 2021-10-21_23-57-01
  done: false
  episode_len_mean: 296.96
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -29.696000000000154
  episode_reward_min: -46.20000000000039
  episodes_this_iter: 2
  episodes_total: 1707
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.630178569665487e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.780520588821835
          entropy_coeff: 0.009999999999999998
          kl: 0.10980358763157652
          policy_loss: 0.11853405386209488
          total_loss: 0.46672888696193693
          vf_explained_var: 0.10712242126464844
          vf_loss: 0.366000037205716
    num_agent_steps_sampled: 545000
    num_agent_steps_trained: 545000
    num_steps_sampled: 545000
    num_steps_trained: 545000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,545,15087.7,545000,-29.696,-20.3,-46.2,296.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 546000
  custom_metrics: {}
  date: 2021-10-21_23-57-17
  done: false
  episode_len_mean: 301.23
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -30.12300000000016
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 2
  episodes_total: 1709
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2945267854498228e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6587519923845926
          entropy_coeff: 0.009999999999999998
          kl: 0.030436978902781044
          policy_loss: -0.05832444594966041
          total_loss: 0.6934315315551228
          vf_explained_var: 0.2379687875509262
          vf_loss: 0.7683434901965989
    num_agent_steps_sampled: 546000
    num_agent_steps_trained: 546000
    num_steps_sampled: 546000
    num_steps_trained: 546000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,546,15104.4,546000,-30.123,-20.3,-46.4,301.23




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 547000
  custom_metrics: {}
  date: 2021-10-21_23-57-52
  done: false
  episode_len_mean: 305.01
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -30.50100000000016
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 2
  episodes_total: 1711
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9417901781747347e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.666498265001509
          entropy_coeff: 0.009999999999999998
          kl: 0.025065159122506012
          policy_loss: -0.06923409683836831
          total_loss: 0.6901814980639351
          vf_explained_var: 0.035694099962711334
          vf_loss: 0.7760805834912592
    num_agent_steps_sampled: 547000
    num_agent_steps_trained: 547000
    num_steps_sampled: 547000
    num_steps_trained: 547000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,547,15138.9,547000,-30.501,-20.3,-46.5,305.01


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 548000
  custom_metrics: {}
  date: 2021-10-21_23-58-11
  done: false
  episode_len_mean: 311.16
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -31.11600000000017
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 3
  episodes_total: 1714
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9126852672621025e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6241243428654142
          entropy_coeff: 0.009999999999999998
          kl: 0.015144160174358332
          policy_loss: 0.032797908948527445
          total_loss: 0.9293456607394748
          vf_explained_var: 0.10238725692033768
          vf_loss: 0.9127889785501692
    num_agent_steps_sampled: 548000
    num_agent_steps_trained: 548000
    num_steps_sampled: 548000
    num_steps_trained: 548000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,548,15157.5,548000,-31.116,-20.3,-46.5,311.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 549000
  custom_metrics: {}
  date: 2021-10-21_23-58-28
  done: false
  episode_len_mean: 315.12
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -31.512000000000175
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 2
  episodes_total: 1716
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9126852672621025e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6891498274273342
          entropy_coeff: 0.009999999999999998
          kl: 0.03583488978756672
          policy_loss: 0.1193190990222825
          total_loss: 0.5457708203130298
          vf_explained_var: 0.13892744481563568
          vf_loss: 0.44334321399736737
    num_agent_steps_sampled: 549000
    num_agent_steps_trained: 549000
    num_steps_sampled: 549000
    num_steps_trained: 549000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,549,15175,549000,-31.512,-20.3,-46.5,315.12


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 550000
  custom_metrics: {}
  date: 2021-10-21_23-58-45
  done: false
  episode_len_mean: 319.01
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -31.90100000000018
  episode_reward_min: -47.1000000000004
  episodes_this_iter: 2
  episodes_total: 1718
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.369027900893155e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6118625733587477
          entropy_coeff: 0.009999999999999998
          kl: 0.032507981187659456
          policy_loss: 0.0028421036071247523
          total_loss: 0.808543555604087
          vf_explained_var: 0.030459141358733177
          vf_loss: 0.8218200821429491
    num_agent_steps_sampled: 550000
    num_agent_steps_trained: 550000
    num_steps_sampled: 550000
    num_steps_trained: 550000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,550,15192.3,550000,-31.901,-20.3,-47.1,319.01


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 551000
  custom_metrics: {}
  date: 2021-10-21_23-59-02
  done: false
  episode_len_mean: 324.87
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -32.487000000000194
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1720
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.55354185133973e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.436086466577318
          entropy_coeff: 0.009999999999999998
          kl: 0.011801197821183647
          policy_loss: 0.061383940196699564
          total_loss: 0.6515636675887637
          vf_explained_var: -0.24919234216213226
          vf_loss: 0.604540596022788
    num_agent_steps_sampled: 551000
    num_agent_steps_trained: 551000
    num_steps_sampled: 551000
    num_steps_trained: 551000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,551,15209.1,551000,-32.487,-21.2,-54.4,324.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 552000
  custom_metrics: {}
  date: 2021-10-21_23-59-19
  done: false
  episode_len_mean: 330.19
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -33.0190000000002
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1722
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.55354185133973e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.4589793218506708
          entropy_coeff: 0.009999999999999998
          kl: 0.015272406702653996
          policy_loss: 0.10045513610045115
          total_loss: 0.7389232569270664
          vf_explained_var: -0.5822930932044983
          vf_loss: 0.653057919939359
    num_agent_steps_sampled: 552000
    num_agent_steps_trained: 552000
    num_steps_sampled: 552000
    num_steps_trained: 552000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,552,15226,552000,-33.019,-21.2,-54.4,330.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 553000
  custom_metrics: {}
  date: 2021-10-21_23-59-37
  done: false
  episode_len_mean: 335.26
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -33.52600000000021
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1724
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.55354185133973e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.4911509950955708
          entropy_coeff: 0.009999999999999998
          kl: 0.019731992837575794
          policy_loss: 0.08674702180756463
          total_loss: 0.7593614975611369
          vf_explained_var: 0.19542868435382843
          vf_loss: 0.687525982161363
    num_agent_steps_sampled: 553000
    num_agent_steps_trained: 553000
    num_steps_sampled: 553000
    num_steps_trained: 553000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,553,15243.3,553000,-33.526,-21.2,-54.4,335.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 554000
  custom_metrics: {}
  date: 2021-10-21_23-59-54
  done: false
  episode_len_mean: 339.94
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -33.99400000000021
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1726
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.55354185133973e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.5292039699024624
          entropy_coeff: 0.009999999999999998
          kl: 0.02055512015704895
          policy_loss: 0.07071961594952478
          total_loss: 0.7715033084154129
          vf_explained_var: 0.052719321101903915
          vf_loss: 0.716075734142214
    num_agent_steps_sampled: 554000
    num_agent_steps_trained: 554000
    num_steps_sampled: 554000
    num_steps_trained: 554000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,554,15261.1,554000,-33.994,-21.2,-54.4,339.94


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 555000
  custom_metrics: {}
  date: 2021-10-22_00-00-10
  done: false
  episode_len_mean: 344.66
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -34.466000000000214
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1728
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.830312777009595e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.436224725511339
          entropy_coeff: 0.009999999999999998
          kl: 0.030281851723365113
          policy_loss: -0.022055636015203266
          total_loss: 0.6901725949512587
          vf_explained_var: -0.5496030449867249
          vf_loss: 0.7265904630844792
    num_agent_steps_sampled: 555000
    num_agent_steps_trained: 555000
    num_steps_sampled: 555000
    num_steps_trained: 555000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,555,15276.7,555000,-34.466,-21.2,-54.4,344.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 556000
  custom_metrics: {}
  date: 2021-10-22_00-00-28
  done: false
  episode_len_mean: 348.75
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -34.87500000000023
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1730
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4745469165514392e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5219359172715081
          entropy_coeff: 0.009999999999999998
          kl: 0.010164157991593944
          policy_loss: -0.0859201413889726
          total_loss: 1.298863379822837
          vf_explained_var: 0.03551148995757103
          vf_loss: 1.400002873937289
    num_agent_steps_sampled: 556000
    num_agent_steps_trained: 556000
    num_steps_sampled: 556000
    num_steps_trained: 556000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,556,15294.8,556000,-34.875,-21.2,-54.4,348.75


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 557000
  custom_metrics: {}
  date: 2021-10-22_00-00-45
  done: false
  episode_len_mean: 352.53
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -35.253000000000235
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1732
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4745469165514392e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4491368717617459
          entropy_coeff: 0.009999999999999998
          kl: 0.019609496500441952
          policy_loss: -0.07342030025190777
          total_loss: 1.2528471754656898
          vf_explained_var: -0.19362451136112213
          vf_loss: 1.3407588679757383
    num_agent_steps_sampled: 557000
    num_agent_steps_trained: 557000
    num_steps_sampled: 557000
    num_steps_trained: 557000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,557,15311.5,557000,-35.253,-21.2,-54.4,352.53


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 558000
  custom_metrics: {}
  date: 2021-10-22_00-01-02
  done: false
  episode_len_mean: 357.27
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -35.727000000000245
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1734
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4745469165514392e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4792431155840555
          entropy_coeff: 0.009999999999999998
          kl: 0.04190362138430992
          policy_loss: -0.08213168349530962
          total_loss: 1.2534666180610656
          vf_explained_var: -7.980068767210469e-05
          vf_loss: 1.3503907239271535
    num_agent_steps_sampled: 558000
    num_agent_steps_trained: 558000
    num_steps_sampled: 558000
    num_steps_trained: 558000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,558,15328.3,558000,-35.727,-21.2,-54.4,357.27


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 559000
  custom_metrics: {}
  date: 2021-10-22_00-01-20
  done: false
  episode_len_mean: 363.59
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -36.35900000000025
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 3
  episodes_total: 1737
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.211820374827159e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.3055422478251988
          entropy_coeff: 0.009999999999999998
          kl: 0.008949529125657918
          policy_loss: 0.03959773315323724
          total_loss: 1.3840286327732934
          vf_explained_var: 0.03166591376066208
          vf_loss: 1.357486327489217
    num_agent_steps_sampled: 559000
    num_agent_steps_trained: 559000
    num_steps_sampled: 559000
    num_steps_trained: 559000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,559,15346.8,559000,-36.359,-21.2,-54.4,363.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 560000
  custom_metrics: {}
  date: 2021-10-22_00-01-39
  done: false
  episode_len_mean: 367.42
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -36.74200000000026
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1739
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.211820374827159e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.3079792698224386
          entropy_coeff: 0.009999999999999998
          kl: 0.019249594787331147
          policy_loss: 0.10924263662762113
          total_loss: 0.7680503971046871
          vf_explained_var: -0.2962726354598999
          vf_loss: 0.6718875384527362
    num_agent_steps_sampled: 560000
    num_agent_steps_trained: 560000
    num_steps_sampled: 560000
    num_steps_trained: 560000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,560,15365.2,560000,-36.742,-21.2,-54.4,367.42




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 561000
  custom_metrics: {}
  date: 2021-10-22_00-02-14
  done: false
  episode_len_mean: 371.05
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -37.10500000000026
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1741
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.211820374827159e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.3692621641688876
          entropy_coeff: 0.009999999999999998
          kl: 0.014999188033152045
          policy_loss: -0.08404987454414367
          total_loss: 1.2329135298728944
          vf_explained_var: -0.12079071253538132
          vf_loss: 1.3306560239858098
    num_agent_steps_sampled: 561000
    num_agent_steps_trained: 561000
    num_steps_sampled: 561000
    num_steps_trained: 561000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,561,15400.3,561000,-37.105,-21.2,-54.4,371.05


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 562000
  custom_metrics: {}
  date: 2021-10-22_00-02-30
  done: false
  episode_len_mean: 375.6
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -37.560000000000265
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1743
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.211820374827159e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.3534498704804314
          entropy_coeff: 0.009999999999999998
          kl: 0.023934436244974617
          policy_loss: -0.07389589862691032
          total_loss: 1.1450663109620411
          vf_explained_var: -0.13094045221805573
          vf_loss: 1.2324967176963886
    num_agent_steps_sampled: 562000
    num_agent_steps_trained: 562000
    num_steps_sampled: 562000
    num_steps_trained: 562000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,562,15416.1,562000,-37.56,-21.2,-54.4,375.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 563000
  custom_metrics: {}
  date: 2021-10-22_00-02-48
  done: false
  episode_len_mean: 379.59
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -37.95900000000027
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1745
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.317730562240739e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5079988718032837
          entropy_coeff: 0.009999999999999998
          kl: 0.05367687582895561
          policy_loss: -0.08424413982364866
          total_loss: 1.2954661889208687
          vf_explained_var: -0.10450031608343124
          vf_loss: 1.3947903186910682
    num_agent_steps_sampled: 563000
    num_agent_steps_trained: 563000
    num_steps_sampled: 563000
    num_steps_trained: 563000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,563,15435,563000,-37.959,-21.2,-54.4,379.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 564000
  custom_metrics: {}
  date: 2021-10-22_00-03-05
  done: false
  episode_len_mean: 385.15
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -38.515000000000285
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 3
  episodes_total: 1748
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.9765958433611085e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4301829404301114
          entropy_coeff: 0.009999999999999998
          kl: 0.011283209381330019
          policy_loss: 0.04475436723894543
          total_loss: 1.3934236725171407
          vf_explained_var: 0.04431586712598801
          vf_loss: 1.362971121735043
    num_agent_steps_sampled: 564000
    num_agent_steps_trained: 564000
    num_steps_sampled: 564000
    num_steps_trained: 564000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,564,15451.5,564000,-38.515,-21.2,-54.4,385.15


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 565000
  custom_metrics: {}
  date: 2021-10-22_00-03-23
  done: false
  episode_len_mean: 389.93
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -38.993000000000286
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1750
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.9765958433611085e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5167020943429734
          entropy_coeff: 0.009999999999999998
          kl: 0.017461350247230712
          policy_loss: 0.10842810869216919
          total_loss: 0.7670166788829698
          vf_explained_var: -0.636353075504303
          vf_loss: 0.673755580580069
    num_agent_steps_sampled: 565000
    num_agent_steps_trained: 565000
    num_steps_sampled: 565000
    num_steps_trained: 565000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,565,15469.5,565000,-38.993,-24.8,-54.4,389.93


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 566000
  custom_metrics: {}
  date: 2021-10-22_00-03-41
  done: false
  episode_len_mean: 393.42
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -39.34200000000029
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1752
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.9765958433611085e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6275703655348883
          entropy_coeff: 0.009999999999999998
          kl: 0.031864487589901576
          policy_loss: 0.049688614077038236
          total_loss: 0.8108535826206207
          vf_explained_var: -0.15302559733390808
          vf_loss: 0.7774406796528234
    num_agent_steps_sampled: 566000
    num_agent_steps_trained: 566000
    num_steps_sampled: 566000
    num_steps_trained: 566000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,566,15487.7,566000,-39.342,-24.8,-54.4,393.42


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 567000
  custom_metrics: {}
  date: 2021-10-22_00-03-59
  done: false
  episode_len_mean: 396.9
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -39.690000000000296
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1754
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.464893765041661e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.535155063205295
          entropy_coeff: 0.009999999999999998
          kl: 0.016046912974265127
          policy_loss: -0.08541865762737062
          total_loss: 1.3262737929821014
          vf_explained_var: -0.24195775389671326
          vf_loss: 1.4270440046158102
    num_agent_steps_sampled: 567000
    num_agent_steps_trained: 567000
    num_steps_sampled: 567000
    num_steps_trained: 567000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,567,15505.7,567000,-39.69,-24.8,-54.4,396.9


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 568000
  custom_metrics: {}
  date: 2021-10-22_00-04-18
  done: false
  episode_len_mean: 401.19
  episode_media: {}
  episode_reward_max: -25.600000000000094
  episode_reward_mean: -40.1190000000003
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 3
  episodes_total: 1757
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.464893765041661e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.628016828166114
          entropy_coeff: 0.009999999999999998
          kl: 0.010942340594654985
          policy_loss: 0.03591940560274654
          total_loss: 1.3694099211030537
          vf_explained_var: -0.15294986963272095
          vf_loss: 1.349770679904355
    num_agent_steps_sampled: 568000
    num_agent_steps_trained: 568000
    num_steps_sampled: 568000
    num_steps_trained: 568000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,568,15524.8,568000,-40.119,-25.6,-54.4,401.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 569000
  custom_metrics: {}
  date: 2021-10-22_00-04-36
  done: false
  episode_len_mean: 404.73
  episode_media: {}
  episode_reward_max: -26.0000000000001
  episode_reward_mean: -40.473000000000305
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1759
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.464893765041661e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.512498856915368
          entropy_coeff: 0.009999999999999998
          kl: 0.015613545329099171
          policy_loss: -0.08811020404100418
          total_loss: 1.2788775563240051
          vf_explained_var: 0.07355441153049469
          vf_loss: 1.3821127302116818
    num_agent_steps_sampled: 569000
    num_agent_steps_trained: 569000
    num_steps_sampled: 569000
    num_steps_trained: 569000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,569,15542,569000,-40.473,-26,-54.4,404.73


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 570000
  custom_metrics: {}
  date: 2021-10-22_00-04-54
  done: false
  episode_len_mean: 408.59
  episode_media: {}
  episode_reward_max: -27.40000000000012
  episode_reward_mean: -40.85900000000031
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 3
  episodes_total: 1762
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.464893765041661e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5438748386171128
          entropy_coeff: 0.009999999999999998
          kl: 0.016336797839277196
          policy_loss: 0.04083659383985731
          total_loss: 1.3452104571792813
          vf_explained_var: -0.23239989578723907
          vf_loss: 1.3198125809431076
    num_agent_steps_sampled: 570000
    num_agent_steps_trained: 570000
    num_steps_sampled: 570000
    num_steps_trained: 570000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,570,15560.7,570000,-40.859,-27.4,-54.4,408.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 571000
  custom_metrics: {}
  date: 2021-10-22_00-05-16
  done: false
  episode_len_mean: 409.54
  episode_media: {}
  episode_reward_max: -27.40000000000012
  episode_reward_mean: -40.95400000000031
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1764
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.464893765041661e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5778353717592029
          entropy_coeff: 0.009999999999999998
          kl: 0.023935823016702114
          policy_loss: -0.09815966288248698
          total_loss: 1.1515435381068124
          vf_explained_var: -0.11688614636659622
          vf_loss: 1.2654815582765473
    num_agent_steps_sampled: 571000
    num_agent_steps_trained: 571000
    num_steps_sampled: 571000
    num_steps_trained: 571000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,571,15582.1,571000,-40.954,-27.4,-54.4,409.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 572000
  custom_metrics: {}
  date: 2021-10-22_00-05-36
  done: false
  episode_len_mean: 412.45
  episode_media: {}
  episode_reward_max: -28.700000000000138
  episode_reward_mean: -41.245000000000324
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 3
  episodes_total: 1767
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1197340647562493e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.4909337162971497
          entropy_coeff: 0.009999999999999998
          kl: 0.03231880470600304
          policy_loss: 0.059094665282302436
          total_loss: 1.2085572808980942
          vf_explained_var: 0.0822174996137619
          vf_loss: 1.1643719553119607
    num_agent_steps_sampled: 572000
    num_agent_steps_trained: 572000
    num_steps_sampled: 572000
    num_steps_trained: 572000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,572,15601.9,572000,-41.245,-28.7,-54.4,412.45




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 573000
  custom_metrics: {}
  date: 2021-10-22_00-06-14
  done: false
  episode_len_mean: 412.87
  episode_media: {}
  episode_reward_max: -28.700000000000138
  episode_reward_mean: -41.287000000000326
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 3
  episodes_total: 1770
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.679601097134374e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.3741791155603198
          entropy_coeff: 0.009999999999999998
          kl: 0.035997203349883
          policy_loss: 0.06114329902662171
          total_loss: 1.1428862422704698
          vf_explained_var: 0.3845520317554474
          vf_loss: 1.0954847269588046
    num_agent_steps_sampled: 573000
    num_agent_steps_trained: 573000
    num_steps_sampled: 573000
    num_steps_trained: 573000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,573,15640.6,573000,-41.287,-28.7,-54.4,412.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 574000
  custom_metrics: {}
  date: 2021-10-22_00-06-38
  done: false
  episode_len_mean: 412.37
  episode_media: {}
  episode_reward_max: -28.700000000000138
  episode_reward_mean: -41.237000000000315
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 3
  episodes_total: 1773
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.519401645701561e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.3253260135650635
          entropy_coeff: 0.009999999999999998
          kl: 0.014023128702333389
          policy_loss: 0.05570888171593348
          total_loss: 1.1423912329806223
          vf_explained_var: 0.3906950354576111
          vf_loss: 1.0999356265076332
    num_agent_steps_sampled: 574000
    num_agent_steps_trained: 574000
    num_steps_sampled: 574000
    num_steps_trained: 574000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,574,15664.2,574000,-41.237,-28.7,-54.4,412.37


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 575000
  custom_metrics: {}
  date: 2021-10-22_00-07-05
  done: false
  episode_len_mean: 411.59
  episode_media: {}
  episode_reward_max: -28.700000000000138
  episode_reward_mean: -41.15900000000031
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 3
  episodes_total: 1776
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.519401645701561e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.326733852757348
          entropy_coeff: 0.009999999999999998
          kl: 0.01300291532202667
          policy_loss: -0.07941075704163975
          total_loss: 1.5325190795792474
          vf_explained_var: 0.2487923949956894
          vf_loss: 1.625197164217631
    num_agent_steps_sampled: 575000
    num_agent_steps_trained: 575000
    num_steps_sampled: 575000
    num_steps_trained: 575000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,575,15691.2,575000,-41.159,-28.7,-54.4,411.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 576000
  custom_metrics: {}
  date: 2021-10-22_00-07-29
  done: false
  episode_len_mean: 409.47
  episode_media: {}
  episode_reward_max: -28.700000000000138
  episode_reward_mean: -40.947000000000315
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 3
  episodes_total: 1779
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.519401645701561e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2485349853833516
          entropy_coeff: 0.009999999999999998
          kl: 0.026931634863264976
          policy_loss: -0.12314676940441131
          total_loss: 1.304299247264862
          vf_explained_var: 0.3929651379585266
          vf_loss: 1.439931367503272
    num_agent_steps_sampled: 576000
    num_agent_steps_trained: 576000
    num_steps_sampled: 576000
    num_steps_trained: 576000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,576,15715.2,576000,-40.947,-28.7,-54.4,409.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 577000
  custom_metrics: {}
  date: 2021-10-22_00-07-51
  done: false
  episode_len_mean: 409.34
  episode_media: {}
  episode_reward_max: -28.80000000000014
  episode_reward_mean: -40.93400000000031
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 4
  episodes_total: 1783
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7791024685523415e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2227098968293932
          entropy_coeff: 0.009999999999999998
          kl: 0.02311168747920686
          policy_loss: 0.012851853751473956
          total_loss: 1.1869569334718917
          vf_explained_var: 0.4211641848087311
          vf_loss: 1.1863321661949158
    num_agent_steps_sampled: 577000
    num_agent_steps_trained: 577000
    num_steps_sampled: 577000
    num_steps_trained: 577000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,577,15737.6,577000,-40.934,-28.8,-54.4,409.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 578000
  custom_metrics: {}
  date: 2021-10-22_00-08-14
  done: false
  episode_len_mean: 408.28
  episode_media: {}
  episode_reward_max: -28.80000000000014
  episode_reward_mean: -40.828000000000316
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 3
  episodes_total: 1786
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.668653702828513e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2493354360262552
          entropy_coeff: 0.009999999999999998
          kl: 0.08746134239426229
          policy_loss: 0.05841721172134082
          total_loss: 0.8945322023497687
          vf_explained_var: 0.5673578977584839
          vf_loss: 0.8486082887070047
    num_agent_steps_sampled: 578000
    num_agent_steps_trained: 578000
    num_steps_sampled: 578000
    num_steps_trained: 578000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,578,15760.4,578000,-40.828,-28.8,-54.4,408.28


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 579000
  custom_metrics: {}
  date: 2021-10-22_00-08-39
  done: false
  episode_len_mean: 407.84
  episode_media: {}
  episode_reward_max: -28.80000000000014
  episode_reward_mean: -40.78400000000031
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 3
  episodes_total: 1789
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.502980554242768e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.336935359901852
          entropy_coeff: 0.009999999999999998
          kl: 0.04252340289063454
          policy_loss: 0.07258206026421653
          total_loss: 0.9400265428755018
          vf_explained_var: 0.3213171362876892
          vf_loss: 0.880813809732596
    num_agent_steps_sampled: 579000
    num_agent_steps_trained: 579000
    num_steps_sampled: 579000
    num_steps_trained: 579000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,579,15785.1,579000,-40.784,-28.8,-54.4,407.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 580000
  custom_metrics: {}
  date: 2021-10-22_00-09-04
  done: false
  episode_len_mean: 407.66
  episode_media: {}
  episode_reward_max: -28.80000000000014
  episode_reward_mean: -40.76600000000031
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 3
  episodes_total: 1792
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2754470831364154e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3974122378561231
          entropy_coeff: 0.009999999999999998
          kl: 0.02043160971351886
          policy_loss: 0.10170177751117282
          total_loss: 1.06676856511169
          vf_explained_var: 0.3280476927757263
          vf_loss: 0.979040887289577
    num_agent_steps_sampled: 580000
    num_agent_steps_trained: 580000
    num_steps_sampled: 580000
    num_steps_trained: 580000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,580,15809.9,580000,-40.766,-28.8,-54.4,407.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 581000
  custom_metrics: {}
  date: 2021-10-22_00-09-27
  done: false
  episode_len_mean: 409.13
  episode_media: {}
  episode_reward_max: -28.80000000000014
  episode_reward_mean: -40.91300000000031
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 3
  episodes_total: 1795
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.913170624704622e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3338768270280625
          entropy_coeff: 0.009999999999999998
          kl: 0.020531138571330054
          policy_loss: 0.03908217681778802
          total_loss: 0.9702891111373901
          vf_explained_var: 0.0995730385184288
          vf_loss: 0.944545661078559
    num_agent_steps_sampled: 581000
    num_agent_steps_trained: 581000
    num_steps_sampled: 581000
    num_steps_trained: 581000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,581,15832.9,581000,-40.913,-28.8,-54.4,409.13


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 582000
  custom_metrics: {}
  date: 2021-10-22_00-09-49
  done: false
  episode_len_mean: 409.44
  episode_media: {}
  episode_reward_max: -28.80000000000014
  episode_reward_mean: -40.94400000000032
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1797
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8697559370569348e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3446686877144707
          entropy_coeff: 0.009999999999999998
          kl: 0.02153563997558747
          policy_loss: -0.08913253529204263
          total_loss: 0.9530868877967199
          vf_explained_var: 0.18783244490623474
          vf_loss: 1.0556660398840905
    num_agent_steps_sampled: 582000
    num_agent_steps_trained: 582000
    num_steps_sampled: 582000
    num_steps_trained: 582000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,582,15855.4,582000,-40.944,-28.8,-54.4,409.44




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 583000
  custom_metrics: {}
  date: 2021-10-22_00-10-26
  done: false
  episode_len_mean: 409.64
  episode_media: {}
  episode_reward_max: -28.80000000000014
  episode_reward_mean: -40.96400000000031
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 3
  episodes_total: 1800
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.304633905585402e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3128033796946208
          entropy_coeff: 0.009999999999999998
          kl: 0.01625245391863004
          policy_loss: -0.09844083132015334
          total_loss: 1.447709822654724
          vf_explained_var: 0.06089448556303978
          vf_loss: 1.5592786126666598
    num_agent_steps_sampled: 583000
    num_agent_steps_trained: 583000
    num_steps_sampled: 583000
    num_steps_trained: 583000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,583,15892.5,583000,-40.964,-28.8,-54.4,409.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 584000
  custom_metrics: {}
  date: 2021-10-22_00-10-48
  done: false
  episode_len_mean: 408.61
  episode_media: {}
  episode_reward_max: -28.80000000000014
  episode_reward_mean: -40.86100000000031
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 3
  episodes_total: 1803
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.304633905585402e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2782277319166395
          entropy_coeff: 0.009999999999999998
          kl: 0.0090356340236379
          policy_loss: 0.0500746210416158
          total_loss: 1.122684227426847
          vf_explained_var: 0.00606354558840394
          vf_loss: 1.0853918453057607
    num_agent_steps_sampled: 584000
    num_agent_steps_trained: 584000
    num_steps_sampled: 584000
    num_steps_trained: 584000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,584,15914.3,584000,-40.861,-28.8,-54.4,408.61


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 585000
  custom_metrics: {}
  date: 2021-10-22_00-11-08
  done: false
  episode_len_mean: 407.66
  episode_media: {}
  episode_reward_max: -28.80000000000014
  episode_reward_mean: -40.766000000000304
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1805
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.304633905585402e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.194243848323822
          entropy_coeff: 0.009999999999999998
          kl: 0.013041392090806871
          policy_loss: -0.08422293232546912
          total_loss: 0.9921491427554024
          vf_explained_var: -0.21744181215763092
          vf_loss: 1.088314467171828
    num_agent_steps_sampled: 585000
    num_agent_steps_trained: 585000
    num_steps_sampled: 585000
    num_steps_trained: 585000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,585,15933.6,585000,-40.766,-28.8,-54.4,407.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 586000
  custom_metrics: {}
  date: 2021-10-22_00-11-26
  done: false
  episode_len_mean: 406.05
  episode_media: {}
  episode_reward_max: -28.80000000000014
  episode_reward_mean: -40.6050000000003
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 3
  episodes_total: 1808
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.304633905585402e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2987071765793694
          entropy_coeff: 0.009999999999999998
          kl: 0.042037217380788326
          policy_loss: 0.047288357300890815
          total_loss: 1.1694152341948616
          vf_explained_var: 0.06222612038254738
          vf_loss: 1.1351137787103653
    num_agent_steps_sampled: 586000
    num_agent_steps_trained: 586000
    num_steps_sampled: 586000
    num_steps_trained: 586000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,586,15952.3,586000,-40.605,-28.8,-54.4,406.05


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 587000
  custom_metrics: {}
  date: 2021-10-22_00-11-46
  done: false
  episode_len_mean: 404.96
  episode_media: {}
  episode_reward_max: -28.80000000000014
  episode_reward_mean: -40.4960000000003
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 2
  episodes_total: 1810
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.4569508583781e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1965606556998358
          entropy_coeff: 0.009999999999999998
          kl: 0.014725337127647247
          policy_loss: -0.08291154089901183
          total_loss: 1.014268116487397
          vf_explained_var: 0.06013857573270798
          vf_loss: 1.1091451560457548
    num_agent_steps_sampled: 587000
    num_agent_steps_trained: 587000
    num_steps_sampled: 587000
    num_steps_trained: 587000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,587,15972.4,587000,-40.496,-28.8,-54.4,404.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 588000
  custom_metrics: {}
  date: 2021-10-22_00-12-08
  done: false
  episode_len_mean: 402.01
  episode_media: {}
  episode_reward_max: -28.80000000000014
  episode_reward_mean: -40.201000000000306
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 3
  episodes_total: 1813
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.4569508583781e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.982834396759669
          entropy_coeff: 0.009999999999999998
          kl: 0.06884199893084712
          policy_loss: -0.09081365060475137
          total_loss: 1.3266390906439887
          vf_explained_var: 0.13418538868427277
          vf_loss: 1.4272806419266595
    num_agent_steps_sampled: 588000
    num_agent_steps_trained: 588000
    num_steps_sampled: 588000
    num_steps_trained: 588000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,588,15994.2,588000,-40.201,-28.8,-54.4,402.01


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 589000
  custom_metrics: {}
  date: 2021-10-22_00-12-32
  done: false
  episode_len_mean: 396.12
  episode_media: {}
  episode_reward_max: -28.000000000000128
  episode_reward_mean: -39.612000000000286
  episode_reward_min: -54.4000000000005
  episodes_this_iter: 4
  episodes_total: 1817
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.685426287567152e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9081032335758209
          entropy_coeff: 0.009999999999999998
          kl: 0.060632847113788677
          policy_loss: 0.009263456033335792
          total_loss: 1.3038354893525441
          vf_explained_var: 0.31638795137405396
          vf_loss: 1.303652486536238
    num_agent_steps_sampled: 589000
    num_agent_steps_trained: 589000
    num_steps_sampled: 589000
    num_steps_trained: 589000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,589,16018.3,589000,-39.612,-28,-54.4,396.12


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 590000
  custom_metrics: {}
  date: 2021-10-22_00-13-01
  done: false
  episode_len_mean: 389.41
  episode_media: {}
  episode_reward_max: -26.80000000000011
  episode_reward_mean: -38.94100000000029
  episode_reward_min: -53.100000000000485
  episodes_this_iter: 3
  episodes_total: 1820
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4528139431350726e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.9577628831068675
          entropy_coeff: 0.009999999999999998
          kl: 0.05432663272787508
          policy_loss: -0.08704403738180796
          total_loss: 0.9398365623421139
          vf_explained_var: 0.30936869978904724
          vf_loss: 1.0364574465486738
    num_agent_steps_sampled: 590000
    num_agent_steps_trained: 590000
    num_steps_sampled: 590000
    num_steps_trained: 590000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,590,16046.5,590000,-38.941,-26.8,-53.1,389.41


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 591000
  custom_metrics: {}
  date: 2021-10-22_00-13-25
  done: false
  episode_len_mean: 380.25
  episode_media: {}
  episode_reward_max: -26.80000000000011
  episode_reward_mean: -38.02500000000027
  episode_reward_min: -49.700000000000436
  episodes_this_iter: 4
  episodes_total: 1824
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1792209147026088e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.0205502834584979
          entropy_coeff: 0.009999999999999998
          kl: 0.043668755319800176
          policy_loss: 0.03558703594737583
          total_loss: 1.1018722891807555
          vf_explained_var: 0.36658376455307007
          vf_loss: 1.0764897932608923
    num_agent_steps_sampled: 591000
    num_agent_steps_trained: 591000
    num_steps_sampled: 591000
    num_steps_trained: 591000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,591,16071.2,591000,-38.025,-26.8,-49.7,380.25


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 592000
  custom_metrics: {}
  date: 2021-10-22_00-13-47
  done: false
  episode_len_mean: 375.66
  episode_media: {}
  episode_reward_max: -26.80000000000011
  episode_reward_mean: -37.566000000000265
  episode_reward_min: -49.700000000000436
  episodes_this_iter: 3
  episodes_total: 1827
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.268831372053914e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.0726047959592608
          entropy_coeff: 0.009999999999999998
          kl: 0.01606147655138758
          policy_loss: 0.08915666780538029
          total_loss: 0.9624655038118363
          vf_explained_var: 0.2563430070877075
          vf_loss: 0.8840343601587746
    num_agent_steps_sampled: 592000
    num_agent_steps_trained: 592000
    num_steps_sampled: 592000
    num_steps_trained: 592000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,592,16093.2,592000,-37.566,-26.8,-49.7,375.66




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 593000
  custom_metrics: {}
  date: 2021-10-22_00-14-28
  done: false
  episode_len_mean: 370.91
  episode_media: {}
  episode_reward_max: -26.80000000000011
  episode_reward_mean: -37.09100000000026
  episode_reward_min: -49.700000000000436
  episodes_this_iter: 3
  episodes_total: 1830
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.268831372053914e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.103860370318095
          entropy_coeff: 0.009999999999999998
          kl: 0.023347978703108337
          policy_loss: 0.04196588951680395
          total_loss: 1.0491014026933245
          vf_explained_var: 0.19263994693756104
          vf_loss: 1.0181733436882496
    num_agent_steps_sampled: 593000
    num_agent_steps_trained: 593000
    num_steps_sampled: 593000
    num_steps_trained: 593000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,593,16133.5,593000,-37.091,-26.8,-49.7,370.91


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 594000
  custom_metrics: {}
  date: 2021-10-22_00-14-53
  done: false
  episode_len_mean: 366.65
  episode_media: {}
  episode_reward_max: -26.80000000000011
  episode_reward_mean: -36.665000000000255
  episode_reward_min: -49.700000000000436
  episodes_this_iter: 3
  episodes_total: 1833
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.90324705808087e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.9325935218069289
          entropy_coeff: 0.009999999999999998
          kl: 0.018052356496282427
          policy_loss: -0.09950500544574525
          total_loss: 1.3463032020462884
          vf_explained_var: 0.09965737909078598
          vf_loss: 1.4551332632700602
    num_agent_steps_sampled: 594000
    num_agent_steps_trained: 594000
    num_steps_sampled: 594000
    num_steps_trained: 594000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,594,16158.7,594000,-36.665,-26.8,-49.7,366.65


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 595000
  custom_metrics: {}
  date: 2021-10-22_00-15-19
  done: false
  episode_len_mean: 359.16
  episode_media: {}
  episode_reward_max: -26.80000000000011
  episode_reward_mean: -35.91600000000024
  episode_reward_min: -49.000000000000426
  episodes_this_iter: 4
  episodes_total: 1837
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.90324705808087e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.9037302712599437
          entropy_coeff: 0.009999999999999998
          kl: 0.02328189608291227
          policy_loss: 0.04165364056825638
          total_loss: 1.2982393582661946
          vf_explained_var: 0.15438620746135712
          vf_loss: 1.2656218859884474
    num_agent_steps_sampled: 595000
    num_agent_steps_trained: 595000
    num_steps_sampled: 595000
    num_steps_trained: 595000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,595,16185.1,595000,-35.916,-26.8,-49,359.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 596000
  custom_metrics: {}
  date: 2021-10-22_00-15-45
  done: false
  episode_len_mean: 354.48
  episode_media: {}
  episode_reward_max: -26.80000000000011
  episode_reward_mean: -35.44800000000023
  episode_reward_min: -49.000000000000426
  episodes_this_iter: 3
  episodes_total: 1840
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.354870587121304e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.8634338094128503
          entropy_coeff: 0.009999999999999998
          kl: 0.010896601318345094
          policy_loss: -0.11829453516337607
          total_loss: 1.1605084147718219
          vf_explained_var: 0.24523043632507324
          vf_loss: 1.2874364892641703
    num_agent_steps_sampled: 596000
    num_agent_steps_trained: 596000
    num_steps_sampled: 596000
    num_steps_trained: 596000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,596,16210.9,596000,-35.448,-26.8,-49,354.48


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 597000
  custom_metrics: {}
  date: 2021-10-22_00-16-10
  done: false
  episode_len_mean: 347.44
  episode_media: {}
  episode_reward_max: -26.80000000000011
  episode_reward_mean: -34.74400000000022
  episode_reward_min: -48.50000000000042
  episodes_this_iter: 4
  episodes_total: 1844
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.354870587121304e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.8105843040678237
          entropy_coeff: 0.009999999999999998
          kl: 0.017621519477327683
          policy_loss: 0.04483184383975135
          total_loss: 1.0640423039595286
          vf_explained_var: 0.3861566185951233
          vf_loss: 1.0273150119516585
    num_agent_steps_sampled: 597000
    num_agent_steps_trained: 597000
    num_steps_sampled: 597000
    num_steps_trained: 597000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,597,16235.3,597000,-34.744,-26.8,-48.5,347.44


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 598000
  custom_metrics: {}
  date: 2021-10-22_00-16-36
  done: false
  episode_len_mean: 342.48
  episode_media: {}
  episode_reward_max: -26.80000000000011
  episode_reward_mean: -34.24800000000022
  episode_reward_min: -47.0000000000004
  episodes_this_iter: 3
  episodes_total: 1847
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.354870587121304e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.9494138552082909
          entropy_coeff: 0.009999999999999998
          kl: 0.011533921963649933
          policy_loss: -0.032670188115702735
          total_loss: 0.9535549713505639
          vf_explained_var: 0.2617640495300293
          vf_loss: 0.9957184559769101
    num_agent_steps_sampled: 598000
    num_agent_steps_trained: 598000
    num_steps_sampled: 598000
    num_steps_trained: 598000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,598,16261.5,598000,-34.248,-26.8,-47,342.48


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 599000
  custom_metrics: {}
  date: 2021-10-22_00-16-59
  done: false
  episode_len_mean: 338.11
  episode_media: {}
  episode_reward_max: -26.80000000000011
  episode_reward_mean: -33.81100000000021
  episode_reward_min: -46.800000000000395
  episodes_this_iter: 3
  episodes_total: 1850
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.354870587121304e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.8407783879174127
          entropy_coeff: 0.009999999999999998
          kl: 0.015471313861802068
          policy_loss: -0.13541981329520544
          total_loss: 0.9861178186204699
          vf_explained_var: 0.317010760307312
          vf_loss: 1.1299442887306212
    num_agent_steps_sampled: 599000
    num_agent_steps_trained: 599000
    num_steps_sampled: 599000
    num_steps_trained: 599000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,599,16284.7,599000,-33.811,-26.8,-46.8,338.11


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 600000
  custom_metrics: {}
  date: 2021-10-22_00-17-22
  done: false
  episode_len_mean: 332.54
  episode_media: {}
  episode_reward_max: -26.80000000000011
  episode_reward_mean: -33.2540000000002
  episode_reward_min: -46.30000000000039
  episodes_this_iter: 4
  episodes_total: 1854
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.354870587121304e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.8502962483300103
          entropy_coeff: 0.009999999999999998
          kl: 0.014488134188925628
          policy_loss: 0.04621132901973195
          total_loss: 1.006118714147144
          vf_explained_var: 0.34740766882896423
          vf_loss: 0.9684092932277255
    num_agent_steps_sampled: 600000
    num_agent_steps_trained: 600000
    num_steps_sampled: 600000
    num_steps_trained: 600000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,600,16307.6,600000,-33.254,-26.8,-46.3,332.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 601000
  custom_metrics: {}
  date: 2021-10-22_00-17-45
  done: false
  episode_len_mean: 330.0
  episode_media: {}
  episode_reward_max: -26.80000000000011
  episode_reward_mean: -33.000000000000206
  episode_reward_min: -46.30000000000039
  episodes_this_iter: 3
  episodes_total: 1857
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.354870587121304e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.9884119364950392
          entropy_coeff: 0.009999999999999998
          kl: 0.05641123188120226
          policy_loss: 0.06168238057030572
          total_loss: 0.9515102005667156
          vf_explained_var: 0.41090694069862366
          vf_loss: 0.8997078025092681
    num_agent_steps_sampled: 601000
    num_agent_steps_trained: 601000
    num_steps_sampled: 601000
    num_steps_trained: 601000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,601,16330.8,601000,-33,-26.8,-46.3,330


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 602000
  custom_metrics: {}
  date: 2021-10-22_00-18-05
  done: false
  episode_len_mean: 329.25
  episode_media: {}
  episode_reward_max: -26.80000000000011
  episode_reward_mean: -32.9250000000002
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 2
  episodes_total: 1859
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00011032305880681958
          cur_lr: 5.000000000000001e-05
          entropy: 0.7408214383655124
          entropy_coeff: 0.009999999999999998
          kl: 0.013492646631282568
          policy_loss: -0.06487664961152606
          total_loss: 1.0323133438825607
          vf_explained_var: 0.08448361605405807
          vf_loss: 1.1045967214637333
    num_agent_steps_sampled: 602000
    num_agent_steps_trained: 602000
    num_steps_sampled: 602000
    num_steps_trained: 602000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,602,16350.4,602000,-32.925,-26.8,-43.5,329.25




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 603000
  custom_metrics: {}
  date: 2021-10-22_00-18-45
  done: false
  episode_len_mean: 326.26
  episode_media: {}
  episode_reward_max: -26.80000000000011
  episode_reward_mean: -32.6260000000002
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 4
  episodes_total: 1863
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00011032305880681958
          cur_lr: 5.000000000000001e-05
          entropy: 0.7931122024854024
          entropy_coeff: 0.009999999999999998
          kl: 0.05370561468766704
          policy_loss: 0.011716842444406616
          total_loss: 1.2873575614558326
          vf_explained_var: 0.2192991077899933
          vf_loss: 1.2835659119817946
    num_agent_steps_sampled: 603000
    num_agent_steps_trained: 603000
    num_steps_sampled: 603000
    num_steps_trained: 603000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,603,16390.6,603000,-32.626,-26.8,-43.5,326.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 604000
  custom_metrics: {}
  date: 2021-10-22_00-19-13
  done: false
  episode_len_mean: 323.69
  episode_media: {}
  episode_reward_max: -26.80000000000011
  episode_reward_mean: -32.369000000000185
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 3
  episodes_total: 1866
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00016548458821022942
          cur_lr: 5.000000000000001e-05
          entropy: 0.7158391336599986
          entropy_coeff: 0.009999999999999998
          kl: 0.013512591841522455
          policy_loss: 0.04405156672000885
          total_loss: 0.9394979430569543
          vf_explained_var: 0.3815585970878601
          vf_loss: 0.9026025376386113
    num_agent_steps_sampled: 604000
    num_agent_steps_trained: 604000
    num_steps_sampled: 604000
    num_steps_trained: 604000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,604,16418.2,604000,-32.369,-26.8,-43.5,323.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 605000
  custom_metrics: {}
  date: 2021-10-22_00-19-38
  done: false
  episode_len_mean: 320.86
  episode_media: {}
  episode_reward_max: -26.80000000000011
  episode_reward_mean: -32.08600000000018
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 4
  episodes_total: 1870
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00016548458821022942
          cur_lr: 5.000000000000001e-05
          entropy: 0.6672622481981914
          entropy_coeff: 0.009999999999999998
          kl: 0.015824171152688395
          policy_loss: 0.022591713815927505
          total_loss: 1.1425675637192196
          vf_explained_var: 0.26742836833000183
          vf_loss: 1.1266458537843493
    num_agent_steps_sampled: 605000
    num_agent_steps_trained: 605000
    num_steps_sampled: 605000
    num_steps_trained: 605000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,605,16443.5,605000,-32.086,-26.8,-43.5,320.86


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 606000
  custom_metrics: {}
  date: 2021-10-22_00-20-04
  done: false
  episode_len_mean: 319.49
  episode_media: {}
  episode_reward_max: -26.600000000000108
  episode_reward_mean: -31.94900000000018
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 3
  episodes_total: 1873
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00016548458821022942
          cur_lr: 5.000000000000001e-05
          entropy: 0.6279500759310193
          entropy_coeff: 0.009999999999999998
          kl: 0.018070710569365486
          policy_loss: 0.020851690073808035
          total_loss: 0.9338222003645367
          vf_explained_var: 0.39677557349205017
          vf_loss: 0.9192470146550072
    num_agent_steps_sampled: 606000
    num_agent_steps_trained: 606000
    num_steps_sampled: 606000
    num_steps_trained: 606000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,606,16469.5,606000,-31.949,-26.6,-43.5,319.49


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 607000
  custom_metrics: {}
  date: 2021-10-22_00-20-31
  done: false
  episode_len_mean: 317.69
  episode_media: {}
  episode_reward_max: -25.800000000000097
  episode_reward_mean: -31.769000000000183
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 4
  episodes_total: 1877
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00016548458821022942
          cur_lr: 5.000000000000001e-05
          entropy: 0.5534189638164309
          entropy_coeff: 0.009999999999999998
          kl: 0.010368706000348421
          policy_loss: 0.014807692087358899
          total_loss: 1.1557437234454684
          vf_explained_var: 0.3634195029735565
          vf_loss: 1.1464685214890373
    num_agent_steps_sampled: 607000
    num_agent_steps_trained: 607000
    num_steps_sampled: 607000
    num_steps_trained: 607000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,607,16496.9,607000,-31.769,-25.8,-43.5,317.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 608000
  custom_metrics: {}
  date: 2021-10-22_00-20-58
  done: false
  episode_len_mean: 315.6
  episode_media: {}
  episode_reward_max: -25.800000000000097
  episode_reward_mean: -31.560000000000176
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 4
  episodes_total: 1881
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00016548458821022942
          cur_lr: 5.000000000000001e-05
          entropy: 0.48069574667347803
          entropy_coeff: 0.009999999999999998
          kl: 0.011528598389155004
          policy_loss: 0.025579603678650326
          total_loss: 1.0607825444804297
          vf_explained_var: 0.4310145378112793
          vf_loss: 1.0400079879495832
    num_agent_steps_sampled: 608000
    num_agent_steps_trained: 608000
    num_steps_sampled: 608000
    num_steps_trained: 608000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,608,16523.4,608000,-31.56,-25.8,-43.5,315.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 609000
  custom_metrics: {}
  date: 2021-10-22_00-21-27
  done: false
  episode_len_mean: 313.23
  episode_media: {}
  episode_reward_max: -25.500000000000092
  episode_reward_mean: -31.323000000000174
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 4
  episodes_total: 1885
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00016548458821022942
          cur_lr: 5.000000000000001e-05
          entropy: 0.4270535820060306
          entropy_coeff: 0.009999999999999998
          kl: 0.0032845829519231986
          policy_loss: 0.021686836497651206
          total_loss: 1.0735629949304792
          vf_explained_var: 0.4080318510532379
          vf_loss: 1.0561461495028601
    num_agent_steps_sampled: 609000
    num_agent_steps_trained: 609000
    num_steps_sampled: 609000
    num_steps_trained: 609000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,609,16552.2,609000,-31.323,-25.5,-43.5,313.23


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 610000
  custom_metrics: {}
  date: 2021-10-22_00-21-56
  done: false
  episode_len_mean: 310.82
  episode_media: {}
  episode_reward_max: -25.30000000000009
  episode_reward_mean: -31.08200000000017
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 4
  episodes_total: 1889
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.274229410511471e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.3843853066364924
          entropy_coeff: 0.009999999999999998
          kl: 0.00984583100905662
          policy_loss: -0.005602668681078487
          total_loss: 1.140189570850796
          vf_explained_var: 0.3059232831001282
          vf_loss: 1.1496352738804287
    num_agent_steps_sampled: 610000
    num_agent_steps_trained: 610000
    num_steps_sampled: 610000
    num_steps_trained: 610000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,610,16581.5,610000,-31.082,-25.3,-43.5,310.82




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 611000
  custom_metrics: {}
  date: 2021-10-22_00-22-39
  done: false
  episode_len_mean: 309.32
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -30.932000000000166
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 3
  episodes_total: 1892
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.274229410511471e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.4167150169610977
          entropy_coeff: 0.009999999999999998
          kl: 0.023322827164375327
          policy_loss: 0.0452487352821562
          total_loss: 0.9755898538562987
          vf_explained_var: 0.265726774930954
          vf_loss: 0.9345063329156902
    num_agent_steps_sampled: 611000
    num_agent_steps_trained: 611000
    num_steps_sampled: 611000
    num_steps_trained: 611000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,611,16625,611000,-30.932,-23,-43.5,309.32


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 612000
  custom_metrics: {}
  date: 2021-10-22_00-23-06
  done: false
  episode_len_mean: 306.1
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -30.61000000000017
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 4
  episodes_total: 1896
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.000124113441157672
          cur_lr: 5.000000000000001e-05
          entropy: 0.40477831628587513
          entropy_coeff: 0.009999999999999998
          kl: 0.025844602099776952
          policy_loss: 0.007496189946929614
          total_loss: 1.267408009370168
          vf_explained_var: 0.28454530239105225
          vf_loss: 1.2639563918113708
    num_agent_steps_sampled: 612000
    num_agent_steps_trained: 612000
    num_steps_sampled: 612000
    num_steps_trained: 612000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,612,16651.9,612000,-30.61,-23,-43.5,306.1


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 613000
  custom_metrics: {}
  date: 2021-10-22_00-23-36
  done: false
  episode_len_mean: 302.31
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -30.23100000000016
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 4
  episodes_total: 1900
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001861701617365081
          cur_lr: 5.000000000000001e-05
          entropy: 0.42964932951662277
          entropy_coeff: 0.009999999999999998
          kl: 0.01571857526139089
          policy_loss: -0.002976635181241565
          total_loss: 1.2371148261759015
          vf_explained_var: 0.2893393039703369
          vf_loss: 1.2443850232495202
    num_agent_steps_sampled: 613000
    num_agent_steps_trained: 613000
    num_steps_sampled: 613000
    num_steps_trained: 613000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,613,16681.2,613000,-30.231,-23,-43.5,302.31


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 614000
  custom_metrics: {}
  date: 2021-10-22_00-24-04
  done: false
  episode_len_mean: 298.6
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -29.86000000000015
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 3
  episodes_total: 1903
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001861701617365081
          cur_lr: 5.000000000000001e-05
          entropy: 0.3727250393893984
          entropy_coeff: 0.009999999999999998
          kl: 0.01555954053026609
          policy_loss: -0.12156415275401539
          total_loss: 0.8598249753316244
          vf_explained_var: 0.5162277817726135
          vf_loss: 0.9851134730709924
    num_agent_steps_sampled: 614000
    num_agent_steps_trained: 614000
    num_steps_sampled: 614000
    num_steps_trained: 614000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,614,16709.3,614000,-29.86,-23,-43.5,298.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 615000
  custom_metrics: {}
  date: 2021-10-22_00-24-33
  done: false
  episode_len_mean: 291.17
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -29.117000000000143
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 5
  episodes_total: 1908
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001861701617365081
          cur_lr: 5.000000000000001e-05
          entropy: 0.3477353854311837
          entropy_coeff: 0.009999999999999998
          kl: 0.02049957235665387
          policy_loss: -0.008445813175704744
          total_loss: 0.9447185748153263
          vf_explained_var: 0.5448678135871887
          vf_loss: 0.9566379202736749
    num_agent_steps_sampled: 615000
    num_agent_steps_trained: 615000
    num_steps_sampled: 615000
    num_steps_trained: 615000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,615,16738.9,615000,-29.117,-23,-42.1,291.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 616000
  custom_metrics: {}
  date: 2021-10-22_00-25-02
  done: false
  episode_len_mean: 287.4
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -28.740000000000133
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 3
  episodes_total: 1911
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00027925524260476213
          cur_lr: 5.000000000000001e-05
          entropy: 0.3457397199339337
          entropy_coeff: 0.009999999999999998
          kl: 0.004833264425135297
          policy_loss: -0.11047169673773978
          total_loss: 0.7508512629403008
          vf_explained_var: 0.5457931160926819
          vf_loss: 0.8647790127330356
    num_agent_steps_sampled: 616000
    num_agent_steps_trained: 616000
    num_steps_sampled: 616000
    num_steps_trained: 616000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,616,16767.1,616000,-28.74,-23,-42.1,287.4


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 617000
  custom_metrics: {}
  date: 2021-10-22_00-25-30
  done: false
  episode_len_mean: 284.39
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -28.43900000000013
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 1915
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00013962762130238107
          cur_lr: 5.000000000000001e-05
          entropy: 0.3170499684082137
          entropy_coeff: 0.009999999999999998
          kl: 0.008307653581606504
          policy_loss: -0.028881810688310198
          total_loss: 0.7582945517367787
          vf_explained_var: 0.5911855101585388
          vf_loss: 0.7903457009130054
    num_agent_steps_sampled: 617000
    num_agent_steps_trained: 617000
    num_steps_sampled: 617000
    num_steps_trained: 617000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,617,16795.2,617000,-28.439,-23,-42.1,284.39




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 618000
  custom_metrics: {}
  date: 2021-10-22_00-26-14
  done: false
  episode_len_mean: 282.15
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.21500000000013
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 5
  episodes_total: 1920
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00013962762130238107
          cur_lr: 5.000000000000001e-05
          entropy: 0.3283398909701241
          entropy_coeff: 0.009999999999999998
          kl: 0.010676619302420913
          policy_loss: -0.01679866313934326
          total_loss: 1.2974379910363092
          vf_explained_var: 0.340374618768692
          vf_loss: 1.317518558104833
    num_agent_steps_sampled: 618000
    num_agent_steps_trained: 618000
    num_steps_sampled: 618000
    num_steps_trained: 618000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,618,16839.8,618000,-28.215,-21.8,-42.1,282.15


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 619000
  custom_metrics: {}
  date: 2021-10-22_00-26-43
  done: false
  episode_len_mean: 280.96
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.09600000000013
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 1924
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00013962762130238107
          cur_lr: 5.000000000000001e-05
          entropy: 0.32100494222508535
          entropy_coeff: 0.009999999999999998
          kl: 0.004162673271718139
          policy_loss: 0.015315684924523036
          total_loss: 0.9417958160241445
          vf_explained_var: 0.3990440368652344
          vf_loss: 0.92968959874577
    num_agent_steps_sampled: 619000
    num_agent_steps_trained: 619000
    num_steps_sampled: 619000
    num_steps_trained: 619000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,619,16867.9,619000,-28.096,-21.8,-42.1,280.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 620000
  custom_metrics: {}
  date: 2021-10-22_00-27-10
  done: false
  episode_len_mean: 279.01
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.901000000000128
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 3
  episodes_total: 1927
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.981381065119053e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.354230206211408
          entropy_coeff: 0.009999999999999998
          kl: 0.005224087523867856
          policy_loss: -0.04212244732512368
          total_loss: 0.6477083282338248
          vf_explained_var: 0.5539608001708984
          vf_loss: 0.6933727126982477
    num_agent_steps_sampled: 620000
    num_agent_steps_trained: 620000
    num_steps_sampled: 620000
    num_steps_trained: 620000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,620,16895.1,620000,-27.901,-21.8,-42.1,279.01


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 621000
  custom_metrics: {}
  date: 2021-10-22_00-27-38
  done: false
  episode_len_mean: 276.4
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.64000000000013
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 1931
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.981381065119053e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.2938838536540667
          entropy_coeff: 0.009999999999999998
          kl: 0.006299317291145693
          policy_loss: -0.1132171697086758
          total_loss: 1.3132125046518115
          vf_explained_var: 0.32277122139930725
          vf_loss: 1.4293680601649814
    num_agent_steps_sampled: 621000
    num_agent_steps_trained: 621000
    num_steps_sampled: 621000
    num_steps_trained: 621000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,621,16923.5,621000,-27.64,-21.8,-42.1,276.4


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 622000
  custom_metrics: {}
  date: 2021-10-22_00-28-07
  done: false
  episode_len_mean: 274.36
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.436000000000117
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 1935
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.981381065119053e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.2861763884623845
          entropy_coeff: 0.009999999999999998
          kl: 0.002387372013786957
          policy_loss: -0.11112508318490452
          total_loss: 1.2922656337420146
          vf_explained_var: 0.2846880555152893
          vf_loss: 1.4062523047129314
    num_agent_steps_sampled: 622000
    num_agent_steps_trained: 622000
    num_steps_sampled: 622000
    num_steps_trained: 622000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,622,16951.8,622000,-27.436,-21.8,-42.1,274.36


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 623000
  custom_metrics: {}
  date: 2021-10-22_00-28-35
  done: false
  episode_len_mean: 273.12
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.312000000000115
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 1939
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.490690532559527e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.3626300338241789
          entropy_coeff: 0.009999999999999998
          kl: 0.02563160064928809
          policy_loss: -0.10883915672699611
          total_loss: 1.3235601292716133
          vf_explained_var: 0.30531734228134155
          vf_loss: 1.4360246790779962
    num_agent_steps_sampled: 623000
    num_agent_steps_trained: 623000
    num_steps_sampled: 623000
    num_steps_trained: 623000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,623,16980.6,623000,-27.312,-21.8,-42.1,273.12


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 624000
  custom_metrics: {}
  date: 2021-10-22_00-29-05
  done: false
  episode_len_mean: 271.07
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.107000000000117
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 5
  episodes_total: 1944
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.23603579883929e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.27945950660440655
          entropy_coeff: 0.009999999999999998
          kl: 0.006398569163739549
          policy_loss: -0.03707077933682336
          total_loss: 1.1920459396309322
          vf_explained_var: 0.41105905175209045
          vf_loss: 1.2319109777609507
    num_agent_steps_sampled: 624000
    num_agent_steps_trained: 624000
    num_steps_sampled: 624000
    num_steps_trained: 624000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,624,17010.6,624000,-27.107,-21.8,-42.1,271.07


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 625000
  custom_metrics: {}
  date: 2021-10-22_00-29-33
  done: false
  episode_len_mean: 269.22
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -26.92200000000011
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 1948
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.23603579883929e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.4375580115450753
          entropy_coeff: 0.009999999999999998
          kl: 0.033784500351982966
          policy_loss: -0.010114349590407478
          total_loss: 0.9359082930617862
          vf_explained_var: 0.4136567711830139
          vf_loss: 0.9503964629438189
    num_agent_steps_sampled: 625000
    num_agent_steps_trained: 625000
    num_steps_sampled: 625000
    num_steps_trained: 625000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,625,17038.4,625000,-26.922,-21.8,-42.1,269.22




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 626000
  custom_metrics: {}
  date: 2021-10-22_00-30-19
  done: false
  episode_len_mean: 266.06
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -26.60600000000011
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 1952
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.854053698258935e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.27632433192597494
          entropy_coeff: 0.009999999999999998
          kl: 0.006606493948885136
          policy_loss: 0.03883100060953034
          total_loss: 0.9965732998318142
          vf_explained_var: 0.32999759912490845
          vf_loss: 0.9605050259166293
    num_agent_steps_sampled: 626000
    num_agent_steps_trained: 626000
    num_steps_sampled: 626000
    num_steps_trained: 626000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,626,17084.6,626000,-26.606,-19.1,-42.1,266.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 627000
  custom_metrics: {}
  date: 2021-10-22_00-30-50
  done: false
  episode_len_mean: 263.29
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -26.329000000000107
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 1956
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.854053698258935e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.29433247695366543
          entropy_coeff: 0.009999999999999998
          kl: 0.026519942840068467
          policy_loss: 0.02547022319502301
          total_loss: 0.8894039531548817
          vf_explained_var: 0.33458006381988525
          vf_loss: 0.8668749666876263
    num_agent_steps_sampled: 627000
    num_agent_steps_trained: 627000
    num_steps_sampled: 627000
    num_steps_trained: 627000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,627,17115,627000,-26.329,-19.1,-42.1,263.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 628000
  custom_metrics: {}
  date: 2021-10-22_00-31-21
  done: false
  episode_len_mean: 255.95
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -25.59500000000009
  episode_reward_min: -32.000000000000185
  episodes_this_iter: 5
  episodes_total: 1961
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00011781080547388399
          cur_lr: 5.000000000000001e-05
          entropy: 0.36126635753446157
          entropy_coeff: 0.009999999999999998
          kl: 0.018158871659084575
          policy_loss: -0.012659227599700291
          total_loss: 1.5165848520067002
          vf_explained_var: 0.04899005964398384
          vf_loss: 1.5328545954492356
    num_agent_steps_sampled: 628000
    num_agent_steps_trained: 628000
    num_steps_sampled: 628000
    num_steps_trained: 628000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,628,17146.1,628000,-25.595,-19.1,-32,255.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 629000
  custom_metrics: {}
  date: 2021-10-22_00-31-49
  done: false
  episode_len_mean: 254.9
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -25.490000000000094
  episode_reward_min: -31.800000000000182
  episodes_this_iter: 3
  episodes_total: 1964
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00011781080547388399
          cur_lr: 5.000000000000001e-05
          entropy: 0.46624340878592596
          entropy_coeff: 0.009999999999999998
          kl: 0.003962658026392419
          policy_loss: -0.08905499213271671
          total_loss: 1.076530887020959
          vf_explained_var: 0.035248614847660065
          vf_loss: 1.1702478342586093
    num_agent_steps_sampled: 629000
    num_agent_steps_trained: 629000
    num_steps_sampled: 629000
    num_steps_trained: 629000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,629,17173.9,629000,-25.49,-19.1,-31.8,254.9


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 630000
  custom_metrics: {}
  date: 2021-10-22_00-32-15
  done: false
  episode_len_mean: 254.19
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -25.419000000000093
  episode_reward_min: -31.800000000000182
  episodes_this_iter: 4
  episodes_total: 1968
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.8905402736941994e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.44752092990610337
          entropy_coeff: 0.009999999999999998
          kl: 0.008845842938545997
          policy_loss: -0.001834271682633294
          total_loss: 1.1684474402003817
          vf_explained_var: 0.11205977201461792
          vf_loss: 1.1747563891940647
    num_agent_steps_sampled: 630000
    num_agent_steps_trained: 630000
    num_steps_sampled: 630000
    num_steps_trained: 630000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,630,17199.9,630000,-25.419,-19.1,-31.8,254.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 631000
  custom_metrics: {}
  date: 2021-10-22_00-32-43
  done: false
  episode_len_mean: 253.45
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -25.34500000000009
  episode_reward_min: -31.800000000000182
  episodes_this_iter: 4
  episodes_total: 1972
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.8905402736941994e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6037022285991245
          entropy_coeff: 0.009999999999999998
          kl: 0.032233088349604186
          policy_loss: 0.008956392109394074
          total_loss: 1.1117040481832292
          vf_explained_var: 0.16596168279647827
          vf_loss: 1.1087827781836193
    num_agent_steps_sampled: 631000
    num_agent_steps_trained: 631000
    num_steps_sampled: 631000
    num_steps_trained: 631000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,631,17228.2,631000,-25.345,-19.1,-31.8,253.45


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 632000
  custom_metrics: {}
  date: 2021-10-22_00-33-09
  done: false
  episode_len_mean: 253.29
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -25.329000000000082
  episode_reward_min: -31.800000000000182
  episodes_this_iter: 4
  episodes_total: 1976
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.835810410541303e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.4673907819721434
          entropy_coeff: 0.009999999999999998
          kl: 0.031529929297018415
          policy_loss: 0.019112190438641444
          total_loss: 1.0339708824952443
          vf_explained_var: 0.3086525499820709
          vf_loss: 1.0195298241244422
    num_agent_steps_sampled: 632000
    num_agent_steps_trained: 632000
    num_steps_sampled: 632000
    num_steps_trained: 632000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,632,17254.5,632000,-25.329,-19.1,-31.8,253.29




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 633000
  custom_metrics: {}
  date: 2021-10-22_00-33-54
  done: false
  episode_len_mean: 252.21
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -25.221000000000096
  episode_reward_min: -31.800000000000182
  episodes_this_iter: 4
  episodes_total: 1980
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001325371561581195
          cur_lr: 5.000000000000001e-05
          entropy: 0.4235219852791892
          entropy_coeff: 0.009999999999999998
          kl: 0.0039060728440148294
          policy_loss: 0.011115782211224239
          total_loss: 1.1199549833933513
          vf_explained_var: 0.23299939930438995
          vf_loss: 1.1130738933881124
    num_agent_steps_sampled: 633000
    num_agent_steps_trained: 633000
    num_steps_sampled: 633000
    num_steps_trained: 633000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,633,17298.8,633000,-25.221,-19.1,-31.8,252.21


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 634000
  custom_metrics: {}
  date: 2021-10-22_00-34-23
  done: false
  episode_len_mean: 252.26
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -25.226000000000095
  episode_reward_min: -31.800000000000182
  episodes_this_iter: 4
  episodes_total: 1984
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.626857807905976e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.4422829488913218
          entropy_coeff: 0.009999999999999998
          kl: 0.039199634735711364
          policy_loss: 0.037117448697487516
          total_loss: 1.1743963009781309
          vf_explained_var: 0.2333465963602066
          vf_loss: 1.141699080997043
    num_agent_steps_sampled: 634000
    num_agent_steps_trained: 634000
    num_steps_sampled: 634000
    num_steps_trained: 634000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,634,17327.9,634000,-25.226,-19.1,-31.8,252.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 635000
  custom_metrics: {}
  date: 2021-10-22_00-34-52
  done: false
  episode_len_mean: 251.9
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -25.19000000000009
  episode_reward_min: -31.800000000000182
  episodes_this_iter: 4
  episodes_total: 1988
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.940286711858963e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.30434683296415543
          entropy_coeff: 0.009999999999999998
          kl: 0.012354805070315973
          policy_loss: 0.048342459152142206
          total_loss: 1.2466599265734355
          vf_explained_var: 0.10700546205043793
          vf_loss: 1.20135971042845
    num_agent_steps_sampled: 635000
    num_agent_steps_trained: 635000
    num_steps_sampled: 635000
    num_steps_trained: 635000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,635,17356.6,635000,-25.19,-19.1,-31.8,251.9


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 636000
  custom_metrics: {}
  date: 2021-10-22_00-35-21
  done: false
  episode_len_mean: 250.38
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -25.038000000000082
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1992
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.940286711858963e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.3726720412572225
          entropy_coeff: 0.009999999999999998
          kl: 0.010949150060295034
          policy_loss: 0.031460348268349966
          total_loss: 1.1193222496244641
          vf_explained_var: 0.21531854569911957
          vf_loss: 1.0915875395139059
    num_agent_steps_sampled: 636000
    num_agent_steps_trained: 636000
    num_steps_sampled: 636000
    num_steps_trained: 636000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,636,17385.5,636000,-25.038,-19.1,-31.4,250.38


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 637000
  custom_metrics: {}
  date: 2021-10-22_00-35-48
  done: false
  episode_len_mean: 249.32
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -24.932000000000084
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 1996
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.940286711858963e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.3936050537559721
          entropy_coeff: 0.009999999999999998
          kl: 0.01302073074814953
          policy_loss: 0.009785625669691298
          total_loss: 1.172748119301266
          vf_explained_var: 0.290927916765213
          vf_loss: 1.1668972386254204
    num_agent_steps_sampled: 637000
    num_agent_steps_trained: 637000
    num_steps_sampled: 637000
    num_steps_trained: 637000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,637,17412.8,637000,-24.932,-19.1,-28.6,249.32


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 638000
  custom_metrics: {}
  date: 2021-10-22_00-36-19
  done: false
  episode_len_mean: 248.49
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -24.84900000000009
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2000
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.940286711858963e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.33665310939153037
          entropy_coeff: 0.009999999999999998
          kl: 0.019276490013645228
          policy_loss: 0.012127760135465198
          total_loss: 1.1374515056610108
          vf_explained_var: 0.23592591285705566
          vf_loss: 1.1286883758174049
    num_agent_steps_sampled: 638000
    num_agent_steps_trained: 638000
    num_steps_sampled: 638000
    num_steps_trained: 638000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,638,17444,638000,-24.849,-19.1,-28.6,248.49


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 639000
  custom_metrics: {}
  date: 2021-10-22_00-36-49
  done: false
  episode_len_mean: 248.41
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -24.841000000000086
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2004
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.940286711858963e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.4510525397128529
          entropy_coeff: 0.009999999999999998
          kl: 0.09984588546187025
          policy_loss: 0.008870275815327962
          total_loss: 0.9854962786038717
          vf_explained_var: 0.35982948541641235
          vf_loss: 0.9811266011661953
    num_agent_steps_sampled: 639000
    num_agent_steps_trained: 639000
    num_steps_sampled: 639000
    num_steps_trained: 639000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,639,17473.8,639000,-24.841,-19.1,-28.6,248.41


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 640000
  custom_metrics: {}
  date: 2021-10-22_00-37-19
  done: false
  episode_len_mean: 248.2
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -24.820000000000082
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2008
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00014910430067788445
          cur_lr: 5.000000000000001e-05
          entropy: 0.1937956283489863
          entropy_coeff: 0.009999999999999998
          kl: 0.005414624079268649
          policy_loss: 0.007024018797609541
          total_loss: 0.884013173977534
          vf_explained_var: 0.42209717631340027
          vf_loss: 0.8789263069629669
    num_agent_steps_sampled: 640000
    num_agent_steps_trained: 640000
    num_steps_sampled: 640000
    num_steps_trained: 640000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,640,17503.6,640000,-24.82,-19.1,-28.6,248.2




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 641000
  custom_metrics: {}
  date: 2021-10-22_00-38-08
  done: false
  episode_len_mean: 247.39
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -24.739000000000082
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 5
  episodes_total: 2013
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00014910430067788445
          cur_lr: 5.000000000000001e-05
          entropy: 0.2778447492255105
          entropy_coeff: 0.009999999999999998
          kl: 0.07737374557017694
          policy_loss: -0.01452638775938087
          total_loss: 1.221928596496582
          vf_explained_var: 0.3790586590766907
          vf_loss: 1.2392218867937723
    num_agent_steps_sampled: 641000
    num_agent_steps_trained: 641000
    num_steps_sampled: 641000
    num_steps_trained: 641000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,641,17553,641000,-24.739,-19.1,-28.6,247.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 642000
  custom_metrics: {}
  date: 2021-10-22_00-38-36
  done: false
  episode_len_mean: 247.26
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -24.726000000000077
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 3
  episodes_total: 2016
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00022365645101682675
          cur_lr: 5.000000000000001e-05
          entropy: 0.48521144224537743
          entropy_coeff: 0.009999999999999998
          kl: 0.030965046870114556
          policy_loss: -0.09776673995786243
          total_loss: 0.7988443083233303
          vf_explained_var: 0.47175347805023193
          vf_loss: 0.9014562421374851
    num_agent_steps_sampled: 642000
    num_agent_steps_trained: 642000
    num_steps_sampled: 642000
    num_steps_trained: 642000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,642,17580.6,642000,-24.726,-19.1,-28.6,247.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 643000
  custom_metrics: {}
  date: 2021-10-22_00-39-02
  done: false
  episode_len_mean: 248.37
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -24.83700000000008
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2020
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00033548467652523995
          cur_lr: 5.000000000000001e-05
          entropy: 0.3848447256618076
          entropy_coeff: 0.009999999999999998
          kl: 0.023264730178479926
          policy_loss: -0.014253301090664333
          total_loss: 1.0495319386323294
          vf_explained_var: 0.3234773278236389
          vf_loss: 1.0676258789168465
    num_agent_steps_sampled: 643000
    num_agent_steps_trained: 643000
    num_steps_sampled: 643000
    num_steps_trained: 643000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,643,17606.5,643000,-24.837,-19.1,-28.6,248.37


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 644000
  custom_metrics: {}
  date: 2021-10-22_00-39-30
  done: false
  episode_len_mean: 248.58
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -24.85800000000009
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2024
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005032270147878601
          cur_lr: 5.000000000000001e-05
          entropy: 0.43512752254803977
          entropy_coeff: 0.009999999999999998
          kl: 0.005285161776464835
          policy_loss: -0.00872042141854763
          total_loss: 1.1769644260406493
          vf_explained_var: 0.3069775402545929
          vf_loss: 1.1900334662861294
    num_agent_steps_sampled: 644000
    num_agent_steps_trained: 644000
    num_steps_sampled: 644000
    num_steps_trained: 644000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,644,17634.5,644000,-24.858,-19.1,-28.6,248.58


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 645000
  custom_metrics: {}
  date: 2021-10-22_00-39-58
  done: false
  episode_len_mean: 248.44
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -24.844000000000083
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2028
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005032270147878601
          cur_lr: 5.000000000000001e-05
          entropy: 0.4026869455973307
          entropy_coeff: 0.009999999999999998
          kl: 0.016741918251099087
          policy_loss: 0.01273340442114406
          total_loss: 1.164527580473158
          vf_explained_var: 0.305908739566803
          vf_loss: 1.155812609857983
    num_agent_steps_sampled: 645000
    num_agent_steps_trained: 645000
    num_steps_sampled: 645000
    num_steps_trained: 645000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,645,17662.4,645000,-24.844,-19.1,-28.6,248.44


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 646000
  custom_metrics: {}
  date: 2021-10-22_00-40-26
  done: false
  episode_len_mean: 248.6
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -24.860000000000085
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2032
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005032270147878601
          cur_lr: 5.000000000000001e-05
          entropy: 0.36776909679174424
          entropy_coeff: 0.009999999999999998
          kl: 0.0699229995921371
          policy_loss: 0.02578395066989793
          total_loss: 1.0563267555501725
          vf_explained_var: 0.37824559211730957
          vf_loss: 1.0341853062311808
    num_agent_steps_sampled: 646000
    num_agent_steps_trained: 646000
    num_steps_sampled: 646000
    num_steps_trained: 646000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,646,17690.3,646000,-24.86,-19.1,-28.6,248.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 647000
  custom_metrics: {}
  date: 2021-10-22_00-40-55
  done: false
  episode_len_mean: 248.72
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -24.872000000000085
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2036
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007548405221817902
          cur_lr: 5.000000000000001e-05
          entropy: 0.19565001808934743
          entropy_coeff: 0.009999999999999998
          kl: 0.002893354814333272
          policy_loss: 0.02971674609515402
          total_loss: 1.0818486551443736
          vf_explained_var: 0.31995490193367004
          vf_loss: 1.0540862242380777
    num_agent_steps_sampled: 647000
    num_agent_steps_trained: 647000
    num_steps_sampled: 647000
    num_steps_trained: 647000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,647,17719.5,647000,-24.872,-19.1,-28.6,248.72




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 648000
  custom_metrics: {}
  date: 2021-10-22_00-41-41
  done: false
  episode_len_mean: 248.39
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -24.839000000000084
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2040
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003774202610908951
          cur_lr: 5.000000000000001e-05
          entropy: 0.2593773396478759
          entropy_coeff: 0.009999999999999998
          kl: 0.01003456569620877
          policy_loss: 0.026922628780206046
          total_loss: 1.0659977343347338
          vf_explained_var: 0.25227293372154236
          vf_loss: 1.041665091779497
    num_agent_steps_sampled: 648000
    num_agent_steps_trained: 648000
    num_steps_sampled: 648000
    num_steps_trained: 648000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,648,17765.5,648000,-24.839,-19.1,-28.6,248.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 649000
  custom_metrics: {}
  date: 2021-10-22_00-42-13
  done: false
  episode_len_mean: 248.37
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -24.83700000000008
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2044
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003774202610908951
          cur_lr: 5.000000000000001e-05
          entropy: 0.2812020738919576
          entropy_coeff: 0.009999999999999998
          kl: 0.015126526382177256
          policy_loss: 0.0027864160223139657
          total_loss: 1.0034343832068973
          vf_explained_var: 0.36351051926612854
          vf_loss: 1.0034542792373233
    num_agent_steps_sampled: 649000
    num_agent_steps_trained: 649000
    num_steps_sampled: 649000
    num_steps_trained: 649000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,649,17797.3,649000,-24.837,-19.1,-28.6,248.37


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 650000
  custom_metrics: {}
  date: 2021-10-22_00-42-42
  done: false
  episode_len_mean: 248.45
  episode_media: {}
  episode_reward_max: -19.1
  episode_reward_mean: -24.84500000000008
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2048
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003774202610908951
          cur_lr: 5.000000000000001e-05
          entropy: 0.2269580685430103
          entropy_coeff: 0.009999999999999998
          kl: 0.0033316835357658824
          policy_loss: 0.010557605905665292
          total_loss: 1.1597166511747572
          vf_explained_var: 0.22410835325717926
          vf_loss: 1.151427368985282
    num_agent_steps_sampled: 650000
    num_agent_steps_trained: 650000
    num_steps_sampled: 650000
    num_steps_trained: 650000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,650,17826.8,650000,-24.845,-19.1,-28.6,248.45


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 651000
  custom_metrics: {}
  date: 2021-10-22_00-43-10
  done: false
  episode_len_mean: 249.48
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.94800000000009
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2052
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018871013054544755
          cur_lr: 5.000000000000001e-05
          entropy: 0.1775827573405372
          entropy_coeff: 0.009999999999999998
          kl: 0.0032072644137192204
          policy_loss: 0.027820678303639094
          total_loss: 1.0522833552625444
          vf_explained_var: 0.31779009103775024
          vf_loss: 1.0262379030386606
    num_agent_steps_sampled: 651000
    num_agent_steps_trained: 651000
    num_steps_sampled: 651000
    num_steps_trained: 651000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,651,17855,651000,-24.948,-21.6,-28.6,249.48


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 652000
  custom_metrics: {}
  date: 2021-10-22_00-43-38
  done: false
  episode_len_mean: 249.65
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.965000000000085
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2056
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.435506527272378e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.1952252238161034
          entropy_coeff: 0.009999999999999998
          kl: 0.00875824312868632
          policy_loss: 0.011733826829327478
          total_loss: 1.1398854626549615
          vf_explained_var: 0.26450785994529724
          vf_loss: 1.1301030470265283
    num_agent_steps_sampled: 652000
    num_agent_steps_trained: 652000
    num_steps_sampled: 652000
    num_steps_trained: 652000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,652,17883.1,652000,-24.965,-21.6,-28.6,249.65


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 653000
  custom_metrics: {}
  date: 2021-10-22_00-44-06
  done: false
  episode_len_mean: 251.13
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.11300000000009
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2060
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.435506527272378e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.16070480744043986
          entropy_coeff: 0.009999999999999998
          kl: 0.009590361336614232
          policy_loss: 0.01900091158847014
          total_loss: 1.2148074150085448
          vf_explained_var: 0.300929993391037
          vf_loss: 1.1974126524395412
    num_agent_steps_sampled: 653000
    num_agent_steps_trained: 653000
    num_steps_sampled: 653000
    num_steps_trained: 653000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,653,17910.9,653000,-25.113,-21.6,-28.6,251.13


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 654000
  custom_metrics: {}
  date: 2021-10-22_00-44-36
  done: false
  episode_len_mean: 250.95
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.095000000000088
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2064
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.435506527272378e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.17238279614183638
          entropy_coeff: 0.009999999999999998
          kl: 0.006526920724723892
          policy_loss: 0.033415044844150546
          total_loss: 1.2216295798619587
          vf_explained_var: 0.22996710240840912
          vf_loss: 1.189937742551168
    num_agent_steps_sampled: 654000
    num_agent_steps_trained: 654000
    num_steps_sampled: 654000
    num_steps_trained: 654000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,654,17940.6,654000,-25.095,-21.6,-28.6,250.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 655000
  custom_metrics: {}
  date: 2021-10-22_00-45-07
  done: false
  episode_len_mean: 250.14
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.01400000000008
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2068
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.435506527272378e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.27573205828666686
          entropy_coeff: 0.009999999999999998
          kl: 0.07429561227368835
          policy_loss: 0.02009521300594012
          total_loss: 1.2562423600090875
          vf_explained_var: 0.2991342842578888
          vf_loss: 1.2388974613613553
    num_agent_steps_sampled: 655000
    num_agent_steps_trained: 655000
    num_steps_sampled: 655000
    num_steps_trained: 655000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,655,17972,655000,-25.014,-21.6,-28.6,250.14




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 656000
  custom_metrics: {}
  date: 2021-10-22_00-45-51
  done: false
  episode_len_mean: 250.78
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.078000000000085
  episode_reward_min: -30.30000000000016
  episodes_this_iter: 4
  episodes_total: 2072
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00014153259790908565
          cur_lr: 5.000000000000001e-05
          entropy: 0.13270859950118594
          entropy_coeff: 0.009999999999999998
          kl: 0.002343380837862775
          policy_loss: 0.028543735792239507
          total_loss: 1.2419321166144477
          vf_explained_var: 0.3734254539012909
          vf_loss: 1.2147151218520271
    num_agent_steps_sampled: 656000
    num_agent_steps_trained: 656000
    num_steps_sampled: 656000
    num_steps_trained: 656000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,656,18015.9,656000,-25.078,-21.6,-30.3,250.78


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 657000
  custom_metrics: {}
  date: 2021-10-22_00-46-21
  done: false
  episode_len_mean: 249.99
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.999000000000088
  episode_reward_min: -30.30000000000016
  episodes_this_iter: 4
  episodes_total: 2076
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.076629895454282e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.28093216700686346
          entropy_coeff: 0.009999999999999998
          kl: 0.06901596943495328
          policy_loss: 0.035593660672505696
          total_loss: 1.0876893334918551
          vf_explained_var: 0.3492163419723511
          vf_loss: 1.054900093873342
    num_agent_steps_sampled: 657000
    num_agent_steps_trained: 657000
    num_steps_sampled: 657000
    num_steps_trained: 657000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,657,18046,657000,-24.999,-21.6,-30.3,249.99


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 658000
  custom_metrics: {}
  date: 2021-10-22_00-46-52
  done: false
  episode_len_mean: 249.91
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.99100000000009
  episode_reward_min: -30.30000000000016
  episodes_this_iter: 4
  episodes_total: 2080
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010614944843181427
          cur_lr: 5.000000000000001e-05
          entropy: 0.39557375344965195
          entropy_coeff: 0.009999999999999998
          kl: 0.01025930104320499
          policy_loss: 0.019404427458842594
          total_loss: 1.1492364817195468
          vf_explained_var: 0.08947798609733582
          vf_loss: 1.133786686261495
    num_agent_steps_sampled: 658000
    num_agent_steps_trained: 658000
    num_steps_sampled: 658000
    num_steps_trained: 658000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,658,18076.6,658000,-24.991,-21.6,-30.3,249.91


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 659000
  custom_metrics: {}
  date: 2021-10-22_00-47-21
  done: false
  episode_len_mean: 249.24
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.924000000000078
  episode_reward_min: -30.30000000000016
  episodes_this_iter: 4
  episodes_total: 2084
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010614944843181427
          cur_lr: 5.000000000000001e-05
          entropy: 0.5103107000390689
          entropy_coeff: 0.009999999999999998
          kl: 0.032228907797022546
          policy_loss: 0.030405892638696565
          total_loss: 0.9401833133565055
          vf_explained_var: 0.06870969384908676
          vf_loss: 0.914877112209797
    num_agent_steps_sampled: 659000
    num_agent_steps_trained: 659000
    num_steps_sampled: 659000
    num_steps_trained: 659000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,659,18105.3,659000,-24.924,-21.6,-30.3,249.24


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 660000
  custom_metrics: {}
  date: 2021-10-22_00-47-50
  done: false
  episode_len_mean: 248.68
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.868000000000084
  episode_reward_min: -30.30000000000016
  episodes_this_iter: 4
  episodes_total: 2088
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00015922417264772136
          cur_lr: 5.000000000000001e-05
          entropy: 0.40356721679369606
          entropy_coeff: 0.009999999999999998
          kl: 0.010384412738883162
          policy_loss: -0.11399368602368566
          total_loss: 1.450940970579783
          vf_explained_var: 0.07916446030139923
          vf_loss: 1.568968669573466
    num_agent_steps_sampled: 660000
    num_agent_steps_trained: 660000
    num_steps_sampled: 660000
    num_steps_trained: 660000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,660,18134.8,660000,-24.868,-21.6,-30.3,248.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 661000
  custom_metrics: {}
  date: 2021-10-22_00-48-14
  done: false
  episode_len_mean: 250.46
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.046000000000085
  episode_reward_min: -30.600000000000165
  episodes_this_iter: 4
  episodes_total: 2092
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00015922417264772136
          cur_lr: 5.000000000000001e-05
          entropy: 0.6030200004577637
          entropy_coeff: 0.009999999999999998
          kl: 0.12562651223933285
          policy_loss: 0.04425541907548904
          total_loss: 1.1608945813443925
          vf_explained_var: 0.16946831345558167
          vf_loss: 1.122649339834849
    num_agent_steps_sampled: 661000
    num_agent_steps_trained: 661000
    num_steps_sampled: 661000
    num_steps_trained: 661000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,661,18158.8,661000,-25.046,-21.6,-30.6,250.46


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 662000
  custom_metrics: {}
  date: 2021-10-22_00-48-38
  done: false
  episode_len_mean: 251.88
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.18800000000009
  episode_reward_min: -31.900000000000183
  episodes_this_iter: 3
  episodes_total: 2095
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023883625897158208
          cur_lr: 5.000000000000001e-05
          entropy: 0.6899445202615526
          entropy_coeff: 0.009999999999999998
          kl: 0.030288419364261028
          policy_loss: 0.08214798230263921
          total_loss: 0.7048774003982544
          vf_explained_var: 0.13466675579547882
          vf_loss: 0.6296216269334157
    num_agent_steps_sampled: 662000
    num_agent_steps_trained: 662000
    num_steps_sampled: 662000
    num_steps_trained: 662000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,662,18182.1,662000,-25.188,-21.6,-31.9,251.88


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 663000
  custom_metrics: {}
  date: 2021-10-22_00-49-00
  done: false
  episode_len_mean: 253.71
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.37100000000009
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 3
  episodes_total: 2098
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003582543884573731
          cur_lr: 5.000000000000001e-05
          entropy: 0.780079714457194
          entropy_coeff: 0.009999999999999998
          kl: 0.021823572138632445
          policy_loss: -0.09994535748329428
          total_loss: 1.4010157942771913
          vf_explained_var: 0.07607408612966537
          vf_loss: 1.5087541235817803
    num_agent_steps_sampled: 663000
    num_agent_steps_trained: 663000
    num_steps_sampled: 663000
    num_steps_trained: 663000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,663,18204.6,663000,-25.371,-21.6,-33.6,253.71




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 664000
  custom_metrics: {}
  date: 2021-10-22_00-49-47
  done: false
  episode_len_mean: 254.77
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.4770000000001
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 4
  episodes_total: 2102
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005373815826860595
          cur_lr: 5.000000000000001e-05
          entropy: 0.5192846798234516
          entropy_coeff: 0.009999999999999998
          kl: 0.018459001754485546
          policy_loss: 0.0015313910941282909
          total_loss: 1.3627739363246494
          vf_explained_var: 0.19763611257076263
          vf_loss: 1.3664254877302382
    num_agent_steps_sampled: 664000
    num_agent_steps_trained: 664000
    num_steps_sampled: 664000
    num_steps_trained: 664000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,664,18251.1,664000,-25.477,-21.6,-33.6,254.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 665000
  custom_metrics: {}
  date: 2021-10-22_00-50-15
  done: false
  episode_len_mean: 254.78
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.478000000000097
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 4
  episodes_total: 2106
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005373815826860595
          cur_lr: 5.000000000000001e-05
          entropy: 0.5226930035485162
          entropy_coeff: 0.009999999999999998
          kl: 0.0018822907254300641
          policy_loss: -0.029286315333512093
          total_loss: 1.5478176474571228
          vf_explained_var: 0.04490739107131958
          vf_loss: 1.582329883840349
    num_agent_steps_sampled: 665000
    num_agent_steps_trained: 665000
    num_steps_sampled: 665000
    num_steps_trained: 665000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,665,18279.9,665000,-25.478,-21.6,-33.6,254.78


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 666000
  custom_metrics: {}
  date: 2021-10-22_00-50-41
  done: false
  episode_len_mean: 256.25
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -25.625000000000096
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 4
  episodes_total: 2110
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00026869079134302976
          cur_lr: 5.000000000000001e-05
          entropy: 0.7426436715655856
          entropy_coeff: 0.009999999999999998
          kl: 0.012189831395887236
          policy_loss: 0.02725552792350451
          total_loss: 1.3907796449131435
          vf_explained_var: 0.17964428663253784
          vf_loss: 1.370947253704071
    num_agent_steps_sampled: 666000
    num_agent_steps_trained: 666000
    num_steps_sampled: 666000
    num_steps_trained: 666000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,666,18305.4,666000,-25.625,-21.8,-33.6,256.25


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 667000
  custom_metrics: {}
  date: 2021-10-22_00-51-07
  done: false
  episode_len_mean: 257.13
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -25.71300000000009
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 4
  episodes_total: 2114
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00026869079134302976
          cur_lr: 5.000000000000001e-05
          entropy: 0.5357289049360487
          entropy_coeff: 0.009999999999999998
          kl: 0.007311177752123794
          policy_loss: 0.017357568360037275
          total_loss: 1.455953069527944
          vf_explained_var: 0.18206924200057983
          vf_loss: 1.443950817320082
    num_agent_steps_sampled: 667000
    num_agent_steps_trained: 667000
    num_steps_sampled: 667000
    num_steps_trained: 667000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,667,18331.7,667000,-25.713,-21.9,-33.6,257.13


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 668000
  custom_metrics: {}
  date: 2021-10-22_00-51-33
  done: false
  episode_len_mean: 257.87
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -25.787000000000095
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 3
  episodes_total: 2117
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00026869079134302976
          cur_lr: 5.000000000000001e-05
          entropy: 0.6412271334065331
          entropy_coeff: 0.009999999999999998
          kl: 0.006995880819854645
          policy_loss: -0.051472328437699215
          total_loss: 1.1870942605866326
          vf_explained_var: 0.00586926331743598
          vf_loss: 1.2449769801563686
    num_agent_steps_sampled: 668000
    num_agent_steps_trained: 668000
    num_steps_sampled: 668000
    num_steps_trained: 668000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,668,18357.1,668000,-25.787,-21.9,-33.6,257.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 669000
  custom_metrics: {}
  date: 2021-10-22_00-51-57
  done: false
  episode_len_mean: 258.98
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -25.898000000000103
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 4
  episodes_total: 2121
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00026869079134302976
          cur_lr: 5.000000000000001e-05
          entropy: 0.58878299858835
          entropy_coeff: 0.009999999999999998
          kl: 0.010439349867941575
          policy_loss: 0.006528603244158957
          total_loss: 1.7627276990148757
          vf_explained_var: 0.016643159091472626
          vf_loss: 1.7620841145515442
    num_agent_steps_sampled: 669000
    num_agent_steps_trained: 669000
    num_steps_sampled: 669000
    num_steps_trained: 669000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,669,18381.4,669000,-25.898,-21.9,-33.6,258.98


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 670000
  custom_metrics: {}
  date: 2021-10-22_00-52-22
  done: false
  episode_len_mean: 259.82
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -25.9820000000001
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 3
  episodes_total: 2124
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00026869079134302976
          cur_lr: 5.000000000000001e-05
          entropy: 0.5414412875970205
          entropy_coeff: 0.009999999999999998
          kl: 0.007745390244415597
          policy_loss: 0.023111117879549663
          total_loss: 1.2212773574723137
          vf_explained_var: 0.009493453428149223
          vf_loss: 1.2035785694917043
    num_agent_steps_sampled: 670000
    num_agent_steps_trained: 670000
    num_steps_sampled: 670000
    num_steps_trained: 670000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,670,18406.4,670000,-25.982,-21.9,-33.6,259.82


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 671000
  custom_metrics: {}
  date: 2021-10-22_00-52-49
  done: false
  episode_len_mean: 260.13
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.013000000000098
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 4
  episodes_total: 2128
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00026869079134302976
          cur_lr: 5.000000000000001e-05
          entropy: 0.5627081082926856
          entropy_coeff: 0.009999999999999998
          kl: 0.0029645337817420614
          policy_loss: 0.02050288129183981
          total_loss: 1.7638734804259406
          vf_explained_var: 0.015242049470543861
          vf_loss: 1.7489968591266207
    num_agent_steps_sampled: 671000
    num_agent_steps_trained: 671000
    num_steps_sampled: 671000
    num_steps_trained: 671000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,671,18433.6,671000,-26.013,-21.9,-33.6,260.13




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 672000
  custom_metrics: {}
  date: 2021-10-22_00-53-35
  done: false
  episode_len_mean: 260.45
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.045000000000105
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 4
  episodes_total: 2132
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00013434539567151488
          cur_lr: 5.000000000000001e-05
          entropy: 0.5754214187463125
          entropy_coeff: 0.009999999999999998
          kl: 0.0034254956882850567
          policy_loss: 0.01360339406463835
          total_loss: 1.6411111884646945
          vf_explained_var: 0.02350574918091297
          vf_loss: 1.633261541525523
    num_agent_steps_sampled: 672000
    num_agent_steps_trained: 672000
    num_steps_sampled: 672000
    num_steps_trained: 672000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,672,18479.7,672000,-26.045,-21.9,-33.6,260.45


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 673000
  custom_metrics: {}
  date: 2021-10-22_00-54-01
  done: false
  episode_len_mean: 261.56
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.156000000000105
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 3
  episodes_total: 2135
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.717269783575744e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6132798658476936
          entropy_coeff: 0.009999999999999998
          kl: 0.004373049837425372
          policy_loss: 0.03924017995595932
          total_loss: 0.9686471594704522
          vf_explained_var: 0.009239769540727139
          vf_loss: 0.9355394850174586
    num_agent_steps_sampled: 673000
    num_agent_steps_trained: 673000
    num_steps_sampled: 673000
    num_steps_trained: 673000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,673,18504.8,673000,-26.156,-21.9,-33.6,261.56


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 674000
  custom_metrics: {}
  date: 2021-10-22_00-54-26
  done: false
  episode_len_mean: 262.98
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.2980000000001
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 4
  episodes_total: 2139
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.358634891787872e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6419247243139479
          entropy_coeff: 0.009999999999999998
          kl: 0.0027522523349650403
          policy_loss: 0.023588605183694097
          total_loss: 1.6629535330666436
          vf_explained_var: 0.017607003450393677
          vf_loss: 1.6457840773794385
    num_agent_steps_sampled: 674000
    num_agent_steps_trained: 674000
    num_steps_sampled: 674000
    num_steps_trained: 674000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,674,18529.9,674000,-26.298,-21.9,-33.6,262.98


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 675000
  custom_metrics: {}
  date: 2021-10-22_00-54-51
  done: false
  episode_len_mean: 264.55
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.45500000000011
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 3
  episodes_total: 2142
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.679317445893936e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5933809416161643
          entropy_coeff: 0.009999999999999998
          kl: 0.006382605616751233
          policy_loss: 0.02632176296578513
          total_loss: 1.104356727666325
          vf_explained_var: 0.038580916821956635
          vf_loss: 1.083968658083015
    num_agent_steps_sampled: 675000
    num_agent_steps_trained: 675000
    num_steps_sampled: 675000
    num_steps_trained: 675000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,675,18554.8,675000,-26.455,-22.2,-33.6,264.55


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 676000
  custom_metrics: {}
  date: 2021-10-22_00-55-15
  done: false
  episode_len_mean: 265.82
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.582000000000107
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 4
  episodes_total: 2146
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.679317445893936e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5782442616091834
          entropy_coeff: 0.009999999999999998
          kl: 0.007223610173123992
          policy_loss: 0.017270788136455748
          total_loss: 1.5838084856669108
          vf_explained_var: 0.11752492934465408
          vf_loss: 1.5723200268215602
    num_agent_steps_sampled: 676000
    num_agent_steps_trained: 676000
    num_steps_sampled: 676000
    num_steps_trained: 676000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,676,18579.5,676000,-26.582,-22.2,-33.6,265.82


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 677000
  custom_metrics: {}
  date: 2021-10-22_00-55-42
  done: false
  episode_len_mean: 266.22
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.622000000000106
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 4
  episodes_total: 2150
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.679317445893936e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5246996406051848
          entropy_coeff: 0.009999999999999998
          kl: 0.003961439467471766
          policy_loss: 0.0322466644561953
          total_loss: 1.4888052529758877
          vf_explained_var: 0.013315288349986076
          vf_loss: 1.4618055211173164
    num_agent_steps_sampled: 677000
    num_agent_steps_trained: 677000
    num_steps_sampled: 677000
    num_steps_trained: 677000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,677,18606,677000,-26.622,-22.2,-33.6,266.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 678000
  custom_metrics: {}
  date: 2021-10-22_00-56-10
  done: false
  episode_len_mean: 266.45
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.64500000000011
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 3
  episodes_total: 2153
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.39658722946968e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.4940732028749254
          entropy_coeff: 0.009999999999999998
          kl: 0.0016433525247836972
          policy_loss: -0.09868026673793792
          total_loss: 1.6039549125565422
          vf_explained_var: 0.020397229120135307
          vf_loss: 1.7075758894284567
    num_agent_steps_sampled: 678000
    num_agent_steps_trained: 678000
    num_steps_sampled: 678000
    num_steps_trained: 678000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,678,18634,678000,-26.645,-22.2,-33.6,266.45


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 679000
  custom_metrics: {}
  date: 2021-10-22_00-56-37
  done: false
  episode_len_mean: 266.89
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.68900000000011
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 4
  episodes_total: 2157
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.19829361473484e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.4304459273815155
          entropy_coeff: 0.009999999999999998
          kl: 0.012284472257358124
          policy_loss: -0.06464364069203536
          total_loss: 1.4339930163489447
          vf_explained_var: 0.15450944006443024
          vf_loss: 1.5029410574171278
    num_agent_steps_sampled: 679000
    num_agent_steps_trained: 679000
    num_steps_sampled: 679000
    num_steps_trained: 679000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,679,18661.2,679000,-26.689,-22.2,-33.6,266.89




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 680000
  custom_metrics: {}
  date: 2021-10-22_00-57-23
  done: false
  episode_len_mean: 266.43
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.6430000000001
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 4
  episodes_total: 2161
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.19829361473484e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.41110007067521415
          entropy_coeff: 0.009999999999999998
          kl: 0.01819345117953901
          policy_loss: -0.10614125844505098
          total_loss: 1.7974332835939195
          vf_explained_var: 0.2539602220058441
          vf_loss: 1.907685465282864
    num_agent_steps_sampled: 680000
    num_agent_steps_trained: 680000
    num_steps_sampled: 680000
    num_steps_trained: 680000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,680,18706.9,680000,-26.643,-22.2,-33.6,266.43


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 681000
  custom_metrics: {}
  date: 2021-10-22_00-57-53
  done: false
  episode_len_mean: 266.18
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.618000000000112
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 5
  episodes_total: 2166
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.19829361473484e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.408488847149743
          entropy_coeff: 0.009999999999999998
          kl: 0.0038017788702474162
          policy_loss: -0.009380059813459714
          total_loss: 2.0402107821570503
          vf_explained_var: 0.027635982260107994
          vf_loss: 2.0536756926112707
    num_agent_steps_sampled: 681000
    num_agent_steps_trained: 681000
    num_steps_sampled: 681000
    num_steps_trained: 681000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,681,18737.1,681000,-26.618,-22.2,-33.6,266.18


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 682000
  custom_metrics: {}
  date: 2021-10-22_00-58-21
  done: false
  episode_len_mean: 266.17
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.617000000000107
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 4
  episodes_total: 2170
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.09914680736742e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.41204293999407027
          entropy_coeff: 0.009999999999999998
          kl: 0.004817710069746382
          policy_loss: 0.03116007861163881
          total_loss: 1.2643268207708995
          vf_explained_var: 0.2038402557373047
          vf_loss: 1.2372871607542038
    num_agent_steps_sampled: 682000
    num_agent_steps_trained: 682000
    num_steps_sampled: 682000
    num_steps_trained: 682000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,682,18765.4,682000,-26.617,-22.2,-33.6,266.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 683000
  custom_metrics: {}
  date: 2021-10-22_00-58-52
  done: false
  episode_len_mean: 264.67
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.46700000000011
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 4
  episodes_total: 2174
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.04957340368371e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.3382245474391513
          entropy_coeff: 0.009999999999999998
          kl: 0.0015594614473052894
          policy_loss: 0.027447822938362756
          total_loss: 1.6412194304996066
          vf_explained_var: 0.013538564555346966
          vf_loss: 1.6171538366211786
    num_agent_steps_sampled: 683000
    num_agent_steps_trained: 683000
    num_steps_sampled: 683000
    num_steps_trained: 683000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,683,18795.9,683000,-26.467,-22.2,-33.6,264.67


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 684000
  custom_metrics: {}
  date: 2021-10-22_00-59-22
  done: false
  episode_len_mean: 264.21
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.421000000000102
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 4
  episodes_total: 2178
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.24786701841855e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.3536457313431634
          entropy_coeff: 0.009999999999999998
          kl: 0.0008599996565673488
          policy_loss: -0.027816128068500093
          total_loss: 1.607129528787401
          vf_explained_var: 0.018944663926959038
          vf_loss: 1.6384821083810595
    num_agent_steps_sampled: 684000
    num_agent_steps_trained: 684000
    num_steps_sampled: 684000
    num_steps_trained: 684000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,684,18825.8,684000,-26.421,-22.2,-33.6,264.21


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 685000
  custom_metrics: {}
  date: 2021-10-22_00-59-52
  done: false
  episode_len_mean: 263.85
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.385000000000105
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 5
  episodes_total: 2183
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.623933509209275e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.3156747258371777
          entropy_coeff: 0.009999999999999998
          kl: 0.001975347205756053
          policy_loss: -0.02072018807133039
          total_loss: 2.0031904061635335
          vf_explained_var: 0.03405192121863365
          vf_loss: 2.0270673513412474
    num_agent_steps_sampled: 685000
    num_agent_steps_trained: 685000
    num_steps_sampled: 685000
    num_steps_trained: 685000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,685,18856,685000,-26.385,-22.2,-33.6,263.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 686000
  custom_metrics: {}
  date: 2021-10-22_01-00-22
  done: false
  episode_len_mean: 264.03
  episode_media: {}
  episode_reward_max: -22.60000000000005
  episode_reward_mean: -26.403000000000105
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 4
  episodes_total: 2187
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3119667546046375e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.2865450279580222
          entropy_coeff: 0.009999999999999998
          kl: 0.0010684370054317722
          policy_loss: 0.029343239549133513
          total_loss: 1.5043174690670438
          vf_explained_var: 0.025836128741502762
          vf_loss: 1.4778397003809611
    num_agent_steps_sampled: 686000
    num_agent_steps_trained: 686000
    num_steps_sampled: 686000
    num_steps_trained: 686000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,686,18886.1,686000,-26.403,-22.6,-33.6,264.03




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 687000
  custom_metrics: {}
  date: 2021-10-22_01-01-13
  done: false
  episode_len_mean: 261.91
  episode_media: {}
  episode_reward_max: -19.80000000000001
  episode_reward_mean: -26.19100000000011
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 4
  episodes_total: 2191
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.559833773023188e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.24071501145760218
          entropy_coeff: 0.009999999999999998
          kl: 0.0017255572566890957
          policy_loss: -0.022590139425463146
          total_loss: 1.51496571832233
          vf_explained_var: 0.02428619936108589
          vf_loss: 1.539963005648719
    num_agent_steps_sampled: 687000
    num_agent_steps_trained: 687000
    num_steps_sampled: 687000
    num_steps_trained: 687000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,687,18936.7,687000,-26.191,-19.8,-33.6,261.91


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 688000
  custom_metrics: {}
  date: 2021-10-22_01-01-43
  done: false
  episode_len_mean: 257.97
  episode_media: {}
  episode_reward_max: -19.80000000000001
  episode_reward_mean: -25.797000000000097
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 5
  episodes_total: 2196
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.279916886511594e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.20836859908368852
          entropy_coeff: 0.009999999999999998
          kl: 0.0008805439569048051
          policy_loss: -0.0031535151931974623
          total_loss: 1.9252218855751886
          vf_explained_var: 0.03342773765325546
          vf_loss: 1.9304591046439277
    num_agent_steps_sampled: 688000
    num_agent_steps_trained: 688000
    num_steps_sampled: 688000
    num_steps_trained: 68800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,688,18967.1,688000,-25.797,-19.8,-33.6,257.97


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 689000
  custom_metrics: {}
  date: 2021-10-22_01-02-17
  done: false
  episode_len_mean: 254.92
  episode_media: {}
  episode_reward_max: -19.80000000000001
  episode_reward_mean: -25.492000000000093
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 4
  episodes_total: 2200
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.639958443255797e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.1808914519018597
          entropy_coeff: 0.009999999999999998
          kl: 0.0009460386708002015
          policy_loss: -0.07059828746649954
          total_loss: 1.4227972984313966
          vf_explained_var: 0.02133346162736416
          vf_loss: 1.4952044990327624
    num_agent_steps_sampled: 689000
    num_agent_steps_trained: 689000
    num_steps_sampled: 689000
    num_steps_trained: 689000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,689,19001.1,689000,-25.492,-19.8,-31.5,254.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 690000
  custom_metrics: {}
  date: 2021-10-22_01-02-48
  done: false
  episode_len_mean: 253.35
  episode_media: {}
  episode_reward_max: -19.80000000000001
  episode_reward_mean: -25.33500000000009
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 5
  episodes_total: 2205
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.199792216278985e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.17541331466701296
          entropy_coeff: 0.009999999999999998
          kl: 0.007339359229170444
          policy_loss: -0.0007647976279258728
          total_loss: 1.9275097164842818
          vf_explained_var: 0.1559833586215973
          vf_loss: 1.930028654469384
    num_agent_steps_sampled: 690000
    num_agent_steps_trained: 690000
    num_steps_sampled: 690000
    num_steps_trained: 690000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,690,19031.9,690000,-25.335,-19.8,-31.5,253.35


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 691000
  custom_metrics: {}
  date: 2021-10-22_01-03-21
  done: false
  episode_len_mean: 250.92
  episode_media: {}
  episode_reward_max: -19.80000000000001
  episode_reward_mean: -25.09200000000008
  episode_reward_min: -30.800000000000168
  episodes_this_iter: 5
  episodes_total: 2210
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.199792216278985e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.10744028174214892
          entropy_coeff: 0.009999999999999998
          kl: 0.0002379366397926377
          policy_loss: 0.04013848859402869
          total_loss: 1.565830855899387
          vf_explained_var: 0.03151979297399521
          vf_loss: 1.5267667717403837
    num_agent_steps_sampled: 691000
    num_agent_steps_trained: 691000
    num_steps_sampled: 691000
    num_steps_trained: 691000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,691,19064.4,691000,-25.092,-19.8,-30.8,250.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 692000
  custom_metrics: {}
  date: 2021-10-22_01-03-52
  done: false
  episode_len_mean: 248.97
  episode_media: {}
  episode_reward_max: -19.80000000000001
  episode_reward_mean: -24.897000000000084
  episode_reward_min: -30.800000000000168
  episodes_this_iter: 4
  episodes_total: 2214
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.099896108139492e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.14412489897674985
          entropy_coeff: 0.009999999999999998
          kl: 0.000776947597030168
          policy_loss: 0.035544493711656994
          total_loss: 1.3065319783157772
          vf_explained_var: 0.16969548165798187
          vf_loss: 1.2724287317858802
    num_agent_steps_sampled: 692000
    num_agent_steps_trained: 692000
    num_steps_sampled: 692000
    num_steps_trained: 692000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,692,19096,692000,-24.897,-19.8,-30.8,248.97


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 693000
  custom_metrics: {}
  date: 2021-10-22_01-04-24
  done: false
  episode_len_mean: 245.66
  episode_media: {}
  episode_reward_max: -19.80000000000001
  episode_reward_mean: -24.56600000000008
  episode_reward_min: -30.800000000000168
  episodes_this_iter: 5
  episodes_total: 2219
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.049948054069746e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.13081975041164293
          entropy_coeff: 0.009999999999999998
          kl: 0.00021254173266823292
          policy_loss: -0.01624579077793492
          total_loss: 1.856592947906918
          vf_explained_var: 0.0559270866215229
          vf_loss: 1.8741469687885708
    num_agent_steps_sampled: 693000
    num_agent_steps_trained: 693000
    num_steps_sampled: 693000
    num_steps_trained: 693000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,693,19127.4,693000,-24.566,-19.8,-30.8,245.66




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 694000
  custom_metrics: {}
  date: 2021-10-22_01-05-14
  done: false
  episode_len_mean: 242.65
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -24.26500000000008
  episode_reward_min: -30.800000000000168
  episodes_this_iter: 4
  episodes_total: 2223
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.024974027034873e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.11511956693397628
          entropy_coeff: 0.009999999999999998
          kl: 0.00012242192983173477
          policy_loss: -0.034871943005257186
          total_loss: 1.3682015419006348
          vf_explained_var: 0.034600429236888885
          vf_loss: 1.4042246964242724
    num_agent_steps_sampled: 694000
    num_agent_steps_trained: 694000
    num_steps_sampled: 694000
    num_steps_trained: 694000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,694,19177.7,694000,-24.265,-18.9,-30.8,242.65


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 695000
  custom_metrics: {}
  date: 2021-10-22_01-05-45
  done: false
  episode_len_mean: 240.17
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -24.01700000000007
  episode_reward_min: -30.800000000000168
  episodes_this_iter: 5
  episodes_total: 2228
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.124870135174365e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.12979749482538966
          entropy_coeff: 0.009999999999999998
          kl: 0.0006368842261473952
          policy_loss: -0.01122692765461074
          total_loss: 1.7843718277083502
          vf_explained_var: 0.05865132063627243
          vf_loss: 1.7968966960906982
    num_agent_steps_sampled: 695000
    num_agent_steps_trained: 695000
    num_steps_sampled: 695000
    num_steps_trained: 695000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,695,19208.8,695000,-24.017,-18.9,-30.8,240.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 696000
  custom_metrics: {}
  date: 2021-10-22_01-06-17
  done: false
  episode_len_mean: 238.63
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -23.86300000000007
  episode_reward_min: -30.800000000000168
  episodes_this_iter: 4
  episodes_total: 2232
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5624350675871827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.1428420849972301
          entropy_coeff: 0.009999999999999998
          kl: 0.0007202632816348537
          policy_loss: -0.04123276091284222
          total_loss: 1.3006376451916164
          vf_explained_var: 0.04158547893166542
          vf_loss: 1.343298820654551
    num_agent_steps_sampled: 696000
    num_agent_steps_trained: 696000
    num_steps_sampled: 696000
    num_steps_trained: 696000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,696,19241.1,696000,-23.863,-18.9,-30.8,238.63


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 697000
  custom_metrics: {}
  date: 2021-10-22_01-06-49
  done: false
  episode_len_mean: 235.25
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -23.525000000000063
  episode_reward_min: -30.800000000000168
  episodes_this_iter: 5
  episodes_total: 2237
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2812175337935913e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.11591106495923466
          entropy_coeff: 0.009999999999999998
          kl: 0.00246225599181192
          policy_loss: 0.025322226931651433
          total_loss: 1.2946381158298916
          vf_explained_var: 0.1536569893360138
          vf_loss: 1.2704749862353006
    num_agent_steps_sampled: 697000
    num_agent_steps_trained: 697000
    num_steps_sampled: 697000
    num_steps_trained: 697000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,697,19272.6,697000,-23.525,-18.9,-30.8,235.25


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 698000
  custom_metrics: {}
  date: 2021-10-22_01-07-20
  done: false
  episode_len_mean: 231.82
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -23.182000000000052
  episode_reward_min: -30.800000000000168
  episodes_this_iter: 5
  episodes_total: 2242
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.406087668967957e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.15319613963365555
          entropy_coeff: 0.009999999999999998
          kl: 0.0005221810028167006
          policy_loss: -0.012093441602256562
          total_loss: 1.8172657171885171
          vf_explained_var: 0.040484365075826645
          vf_loss: 1.8308911230829028
    num_agent_steps_sampled: 698000
    num_agent_steps_trained: 698000
    num_steps_sampled: 698000
    num_steps_trained: 698000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,698,19304.1,698000,-23.182,-18.9,-30.8,231.82


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 699000
  custom_metrics: {}
  date: 2021-10-22_01-07-52
  done: false
  episode_len_mean: 229.24
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.924000000000056
  episode_reward_min: -27.200000000000117
  episodes_this_iter: 4
  episodes_total: 2246
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.203043834483978e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.16364975902769302
          entropy_coeff: 0.009999999999999998
          kl: 0.0009158852354025508
          policy_loss: 0.030504643875691625
          total_loss: 1.221301234430737
          vf_explained_var: 0.1525411754846573
          vf_loss: 1.1924330936537848
    num_agent_steps_sampled: 699000
    num_agent_steps_trained: 699000
    num_steps_sampled: 699000
    num_steps_trained: 699000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,699,19336.2,699000,-22.924,-18.9,-27.2,229.24




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 700000
  custom_metrics: {}
  date: 2021-10-22_01-08-43
  done: false
  episode_len_mean: 226.8
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.680000000000046
  episode_reward_min: -26.500000000000107
  episodes_this_iter: 5
  episodes_total: 2251
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.601521917241989e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.17731358077791
          entropy_coeff: 0.009999999999999998
          kl: 0.0020285849685114403
          policy_loss: 0.013294338021013472
          total_loss: 1.30634879204962
          vf_explained_var: 0.17246046662330627
          vf_loss: 1.294827593035168
    num_agent_steps_sampled: 700000
    num_agent_steps_trained: 700000
    num_steps_sampled: 700000
    num_steps_trained: 700000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,700,19386.5,700000,-22.68,-18.9,-26.5,226.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 701000
  custom_metrics: {}
  date: 2021-10-22_01-09-18
  done: false
  episode_len_mean: 225.52
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.55200000000005
  episode_reward_min: -26.500000000000107
  episodes_this_iter: 4
  episodes_total: 2255
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.007609586209946e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.24829824964205424
          entropy_coeff: 0.009999999999999998
          kl: 0.0032449293421633386
          policy_loss: 0.023878701693481868
          total_loss: 0.5505579658680492
          vf_explained_var: 0.7186753153800964
          vf_loss: 0.529162248969078
    num_agent_steps_sampled: 701000
    num_agent_steps_trained: 701000
    num_steps_sampled: 701000
    num_steps_trained: 701000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,701,19421.2,701000,-22.552,-18.9,-26.5,225.52


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 702000
  custom_metrics: {}
  date: 2021-10-22_01-09-49
  done: false
  episode_len_mean: 224.27
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.42700000000005
  episode_reward_min: -26.500000000000107
  episodes_this_iter: 5
  episodes_total: 2260
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.003804793104973e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.3551631957292557
          entropy_coeff: 0.009999999999999998
          kl: 0.0062770426771124556
          policy_loss: -0.028835587865776485
          total_loss: 0.8879254817962646
          vf_explained_var: 0.6396604180335999
          vf_loss: 0.920312700006697
    num_agent_steps_sampled: 702000
    num_agent_steps_trained: 702000
    num_steps_sampled: 702000
    num_steps_trained: 702000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,702,19452.5,702000,-22.427,-18.9,-26.5,224.27


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 703000
  custom_metrics: {}
  date: 2021-10-22_01-10-20
  done: false
  episode_len_mean: 223.84
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.384000000000047
  episode_reward_min: -24.70000000000008
  episodes_this_iter: 4
  episodes_total: 2264
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.003804793104973e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6787104179461797
          entropy_coeff: 0.009999999999999998
          kl: 0.05915806228012741
          policy_loss: 0.05530814826488495
          total_loss: 0.6852123306029373
          vf_explained_var: 0.5091776251792908
          vf_loss: 0.636691292292542
    num_agent_steps_sampled: 703000
    num_agent_steps_trained: 703000
    num_steps_sampled: 703000
    num_steps_trained: 703000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,703,19483.5,703000,-22.384,-18.9,-24.7,223.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 704000
  custom_metrics: {}
  date: 2021-10-22_01-10-40
  done: false
  episode_len_mean: 227.32
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.732000000000053
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 3
  episodes_total: 2267
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.005707189657459e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.2347782770792644
          entropy_coeff: 0.009999999999999998
          kl: 0.05609190106168001
          policy_loss: 0.07609174880716536
          total_loss: 0.689136611421903
          vf_explained_var: 0.17778345942497253
          vf_loss: 0.6253926532963912
    num_agent_steps_sampled: 704000
    num_agent_steps_trained: 704000
    num_steps_sampled: 704000
    num_steps_trained: 704000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,704,19503.4,704000,-22.732,-18.9,-41.5,227.32


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 705000
  custom_metrics: {}
  date: 2021-10-22_01-11-01
  done: false
  episode_len_mean: 229.6
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.960000000000058
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 2
  episodes_total: 2269
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.008560784486187e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.311979157394833
          entropy_coeff: 0.009999999999999998
          kl: 0.012223089314406839
          policy_loss: -0.05153852800528209
          total_loss: 0.545309448407756
          vf_explained_var: 0.5172954201698303
          vf_loss: 0.6099677701791127
    num_agent_steps_sampled: 705000
    num_agent_steps_trained: 705000
    num_steps_sampled: 705000
    num_steps_trained: 705000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,705,19524.5,705000,-22.96,-18.9,-41.5,229.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 706000
  custom_metrics: {}
  date: 2021-10-22_01-11-27
  done: false
  episode_len_mean: 231.62
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -23.162000000000063
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 4
  episodes_total: 2273
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.008560784486187e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.9269795444276597
          entropy_coeff: 0.009999999999999998
          kl: 0.009751123500803753
          policy_loss: -0.003591572576098972
          total_loss: 0.3700233139925533
          vf_explained_var: 0.8436161279678345
          vf_loss: 0.3828846795691384
    num_agent_steps_sampled: 706000
    num_agent_steps_trained: 706000
    num_steps_sampled: 706000
    num_steps_trained: 706000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,706,19550.1,706000,-23.162,-18.9,-41.5,231.62


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 707000
  custom_metrics: {}
  date: 2021-10-22_01-11-56
  done: false
  episode_len_mean: 232.9
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -23.290000000000063
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 4
  episodes_total: 2277
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.008560784486187e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.7557422485616472
          entropy_coeff: 0.009999999999999998
          kl: 0.021236132793487093
          policy_loss: -0.0021184570259518095
          total_loss: 0.4547512369023429
          vf_explained_var: 0.8539106845855713
          vf_loss: 0.46442711899677913
    num_agent_steps_sampled: 707000
    num_agent_steps_trained: 707000
    num_steps_sampled: 707000
    num_steps_trained: 707000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,707,19580,707000,-23.29,-18.9,-41.5,232.9




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 708000
  custom_metrics: {}
  date: 2021-10-22_01-12-44
  done: false
  episode_len_mean: 233.05
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -23.30500000000006
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 4
  episodes_total: 2281
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3512841176729277e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7622084528207779
          entropy_coeff: 0.009999999999999998
          kl: 0.0615778591378559
          policy_loss: 0.009048233553767205
          total_loss: 0.3225612165199386
          vf_explained_var: 0.9318224787712097
          vf_loss: 0.3211350633038415
    num_agent_steps_sampled: 708000
    num_agent_steps_trained: 708000
    num_steps_sampled: 708000
    num_steps_trained: 708000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,708,19627.9,708000,-23.305,-18.9,-41.5,233.05


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 709000
  custom_metrics: {}
  date: 2021-10-22_01-13-08
  done: false
  episode_len_mean: 235.4
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -23.54000000000006
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 3
  episodes_total: 2284
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.026926176509392e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.161734747224384
          entropy_coeff: 0.009999999999999998
          kl: 0.04635924128499798
          policy_loss: 0.012527016053597132
          total_loss: 0.34750254907541805
          vf_explained_var: 0.9069541096687317
          vf_loss: 0.3465928789642122
    num_agent_steps_sampled: 709000
    num_agent_steps_trained: 709000
    num_steps_sampled: 709000
    num_steps_trained: 709000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,709,19651.2,709000,-23.54,-18.9,-41.5,235.4


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 710000
  custom_metrics: {}
  date: 2021-10-22_01-13-31
  done: false
  episode_len_mean: 238.36
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -23.836000000000066
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 3
  episodes_total: 2287
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.040389264764088e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.3149844050407409
          entropy_coeff: 0.009999999999999998
          kl: 0.059963075367300354
          policy_loss: -0.10454499630464448
          total_loss: 0.22786930054426194
          vf_explained_var: 0.8250634074211121
          vf_loss: 0.3455641440219349
    num_agent_steps_sampled: 710000
    num_agent_steps_trained: 710000
    num_steps_sampled: 710000
    num_steps_trained: 710000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,710,19674.1,710000,-23.836,-18.9,-41.5,238.36


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 711000
  custom_metrics: {}
  date: 2021-10-22_01-13-56
  done: false
  episode_len_mean: 241.41
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -24.141000000000076
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 3
  episodes_total: 2290
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.560583897146133e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.2245821581946479
          entropy_coeff: 0.009999999999999998
          kl: 0.04161537375588315
          policy_loss: -0.1525728940963745
          total_loss: 0.46377889811992645
          vf_explained_var: 0.585986852645874
          vf_loss: 0.6285976032416026
    num_agent_steps_sampled: 711000
    num_agent_steps_trained: 711000
    num_steps_sampled: 711000
    num_steps_trained: 711000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,711,19699.1,711000,-24.141,-18.9,-41.5,241.41


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 712000
  custom_metrics: {}
  date: 2021-10-22_01-14-19
  done: false
  episode_len_mean: 245.26
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -24.526000000000078
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 4
  episodes_total: 2294
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.840875845719198e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.2812445786264208
          entropy_coeff: 0.009999999999999998
          kl: 0.04734969860512292
          policy_loss: 0.009432520117196771
          total_loss: 0.6123345136642456
          vf_explained_var: 0.5252467393875122
          vf_loss: 0.6157144367694855
    num_agent_steps_sampled: 712000
    num_agent_steps_trained: 712000
    num_steps_sampled: 712000
    num_steps_trained: 712000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,712,19722.4,712000,-24.526,-18.9,-41.5,245.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 713000
  custom_metrics: {}
  date: 2021-10-22_01-14-41
  done: false
  episode_len_mean: 248.4
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -24.840000000000085
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 3
  episodes_total: 2297
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0261313768578797e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.3134631699985928
          entropy_coeff: 0.009999999999999998
          kl: 0.08001769513928564
          policy_loss: 0.06201422106888559
          total_loss: 0.6489842245976131
          vf_explained_var: 0.41621696949005127
          vf_loss: 0.6001046424110731
    num_agent_steps_sampled: 713000
    num_agent_steps_trained: 713000
    num_steps_sampled: 713000
    num_steps_trained: 713000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,713,19744.8,713000,-24.84,-18.9,-41.5,248.4


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 714000
  custom_metrics: {}
  date: 2021-10-22_01-15-07
  done: false
  episode_len_mean: 250.33
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -25.033000000000087
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 3
  episodes_total: 2300
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5391970652868196e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2786539408895705
          entropy_coeff: 0.009999999999999998
          kl: 0.1936809941885575
          policy_loss: 0.09818786059816678
          total_loss: 0.777086591720581
          vf_explained_var: 0.1151934415102005
          vf_loss: 0.6916852704352803
    num_agent_steps_sampled: 714000
    num_agent_steps_trained: 714000
    num_steps_sampled: 714000
    num_steps_trained: 714000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,714,19770,714000,-25.033,-18.9,-41.5,250.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 715000
  custom_metrics: {}
  date: 2021-10-22_01-15-29
  done: false
  episode_len_mean: 253.29
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -25.329000000000093
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 3
  episodes_total: 2303
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3087955979302293e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.4166370683246188
          entropy_coeff: 0.009999999999999998
          kl: 0.03469658646435451
          policy_loss: 0.027196611132886676
          total_loss: 0.8178553587860531
          vf_explained_var: 0.22315406799316406
          vf_loss: 0.804825116528405
    num_agent_steps_sampled: 715000
    num_agent_steps_trained: 715000
    num_steps_sampled: 715000
    num_steps_trained: 715000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,715,19792.7,715000,-25.329,-18.9,-41.5,253.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 716000
  custom_metrics: {}
  date: 2021-10-22_01-15-52
  done: false
  episode_len_mean: 256.68
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -25.66800000000009
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 3
  episodes_total: 2306
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.463193396895344e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.4051265981462266
          entropy_coeff: 0.009999999999999998
          kl: 0.026931188728634304
          policy_loss: 0.02184779635734028
          total_loss: 0.9198913908667035
          vf_explained_var: -0.1833643764257431
          vf_loss: 0.9120948650770717
    num_agent_steps_sampled: 716000
    num_agent_steps_trained: 716000
    num_steps_sampled: 716000
    num_steps_trained: 716000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,716,19815,716000,-25.668,-18.9,-41.5,256.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 717000
  custom_metrics: {}
  date: 2021-10-22_01-16-13
  done: false
  episode_len_mean: 260.5
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -26.0500000000001
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 3
  episodes_total: 2309
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.194790095343017e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.363401597075992
          entropy_coeff: 0.009999999999999998
          kl: 0.04291797415642612
          policy_loss: 0.03870496104160945
          total_loss: 0.9804139355818431
          vf_explained_var: -0.012072560377418995
          vf_loss: 0.9553429813848602
    num_agent_steps_sampled: 717000
    num_agent_steps_trained: 717000
    num_steps_sampled: 717000
    num_steps_trained: 717000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,717,19836,717000,-26.05,-18.9,-41.5,260.5




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 718000
  custom_metrics: {}
  date: 2021-10-22_01-16-54
  done: false
  episode_len_mean: 263.66
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -26.36600000000011
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 3
  episodes_total: 2312
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.792185143014527e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2775373299916586
          entropy_coeff: 0.009999999999999998
          kl: 0.04631755071597748
          policy_loss: 0.0018318959408336216
          total_loss: 0.9632540427976184
          vf_explained_var: 0.05110248550772667
          vf_loss: 0.9741975242892901
    num_agent_steps_sampled: 718000
    num_agent_steps_trained: 718000
    num_steps_sampled: 718000
    num_steps_trained: 718000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,718,19876.9,718000,-26.366,-18.9,-41.5,263.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 719000
  custom_metrics: {}
  date: 2021-10-22_01-17-18
  done: false
  episode_len_mean: 266.22
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -26.622000000000106
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 3
  episodes_total: 2315
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.168827771452179e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0929300804932913
          entropy_coeff: 0.009999999999999998
          kl: 0.2649874695287204
          policy_loss: -0.08261944010025925
          total_loss: 1.168332255548901
          vf_explained_var: 0.19648000597953796
          vf_loss: 1.2618809931808048
    num_agent_steps_sampled: 719000
    num_agent_steps_trained: 719000
    num_steps_sampled: 719000
    num_steps_trained: 719000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,719,19901,719000,-26.622,-18.9,-41.5,266.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 720000
  custom_metrics: {}
  date: 2021-10-22_01-17-39
  done: false
  episode_len_mean: 270.18
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -27.01800000000012
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 3
  episodes_total: 2318
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7532416571782681e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9741484893692864
          entropy_coeff: 0.009999999999999998
          kl: 0.02001384913896342
          policy_loss: -0.0842208637131585
          total_loss: 0.8893167763948441
          vf_explained_var: 0.4603355824947357
          vf_loss: 0.9832791282071007
    num_agent_steps_sampled: 720000
    num_agent_steps_trained: 720000
    num_steps_sampled: 720000
    num_steps_trained: 720000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,720,19922.2,720000,-27.018,-18.9,-41.5,270.18


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 721000
  custom_metrics: {}
  date: 2021-10-22_01-18-00
  done: false
  episode_len_mean: 274.14
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -27.41400000000012
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 3
  episodes_total: 2321
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.629862485767403e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.837096893787384
          entropy_coeff: 0.009999999999999998
          kl: 0.02210724617618184
          policy_loss: 0.033095911600523525
          total_loss: 0.6134900450706482
          vf_explained_var: 0.7274964451789856
          vf_loss: 0.5887651070952415
    num_agent_steps_sampled: 721000
    num_agent_steps_trained: 721000
    num_steps_sampled: 721000
    num_steps_trained: 721000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,721,19943.2,721000,-27.414,-19.2,-41.5,274.14


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 722000
  custom_metrics: {}
  date: 2021-10-22_01-18-24
  done: false
  episode_len_mean: 279.31
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -27.931000000000125
  episode_reward_min: -42.40000000000033
  episodes_this_iter: 3
  episodes_total: 2324
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.944793728651103e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8603955851660834
          entropy_coeff: 0.009999999999999998
          kl: 0.00792887463705859
          policy_loss: 0.025112263444397185
          total_loss: 0.7924718002478282
          vf_explained_var: 0.10316931456327438
          vf_loss: 0.7759634889662266
    num_agent_steps_sampled: 722000
    num_agent_steps_trained: 722000
    num_steps_sampled: 722000
    num_steps_trained: 722000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,722,19967.2,722000,-27.931,-19.2,-42.4,279.31


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 723000
  custom_metrics: {}
  date: 2021-10-22_01-18-43
  done: false
  episode_len_mean: 282.45
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -28.245000000000132
  episode_reward_min: -42.40000000000033
  episodes_this_iter: 2
  episodes_total: 2326
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.944793728651103e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8751040074560378
          entropy_coeff: 0.009999999999999998
          kl: 0.011251123283439234
          policy_loss: -0.07747655312220256
          total_loss: 0.6244871005415916
          vf_explained_var: 0.6999946236610413
          vf_loss: 0.7107146950231658
    num_agent_steps_sampled: 723000
    num_agent_steps_trained: 723000
    num_steps_sampled: 723000
    num_steps_trained: 723000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,723,19986.5,723000,-28.245,-19.2,-42.4,282.45


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 724000
  custom_metrics: {}
  date: 2021-10-22_01-19-05
  done: false
  episode_len_mean: 286.54
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -28.65400000000013
  episode_reward_min: -42.40000000000033
  episodes_this_iter: 3
  episodes_total: 2329
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.944793728651103e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9625094168716006
          entropy_coeff: 0.009999999999999998
          kl: 0.013008828074665817
          policy_loss: -0.035098679529296024
          total_loss: 0.43695844411849977
          vf_explained_var: 0.8882400393486023
          vf_loss: 0.48168221794896654
    num_agent_steps_sampled: 724000
    num_agent_steps_trained: 724000
    num_steps_sampled: 724000
    num_steps_trained: 724000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,724,20008.4,724000,-28.654,-19.2,-42.4,286.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 725000
  custom_metrics: {}
  date: 2021-10-22_01-19-25
  done: false
  episode_len_mean: 290.72
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -29.072000000000145
  episode_reward_min: -42.40000000000033
  episodes_this_iter: 3
  episodes_total: 2332
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.944793728651103e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0330132140053643
          entropy_coeff: 0.009999999999999998
          kl: 0.009425189612771116
          policy_loss: 0.012229550547069973
          total_loss: 0.48013760066694683
          vf_explained_var: 0.8593944907188416
          vf_loss: 0.4782381784584787
    num_agent_steps_sampled: 725000
    num_agent_steps_trained: 725000
    num_steps_sampled: 725000
    num_steps_trained: 725000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,725,20028.6,725000,-29.072,-19.2,-42.4,290.72


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 726000
  custom_metrics: {}
  date: 2021-10-22_01-19-48
  done: false
  episode_len_mean: 293.98
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -29.39800000000015
  episode_reward_min: -42.40000000000033
  episodes_this_iter: 3
  episodes_total: 2335
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.944793728651103e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0793624248769549
          entropy_coeff: 0.009999999999999998
          kl: 0.028146144682853307
          policy_loss: 0.03845307744211621
          total_loss: 0.5119456604123116
          vf_explained_var: 0.8619736433029175
          vf_loss: 0.48428620224197705
    num_agent_steps_sampled: 726000
    num_agent_steps_trained: 726000
    num_steps_sampled: 726000
    num_steps_trained: 726000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,726,20051.1,726000,-29.398,-19.2,-42.4,293.98


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 727000
  custom_metrics: {}
  date: 2021-10-22_01-20-10
  done: false
  episode_len_mean: 297.52
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -29.75200000000015
  episode_reward_min: -42.40000000000033
  episodes_this_iter: 3
  episodes_total: 2338
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.917190592976657e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.223788419034746
          entropy_coeff: 0.009999999999999998
          kl: 0.07719106899345718
          policy_loss: 0.09806268910566966
          total_loss: 1.0972572432623968
          vf_explained_var: 0.5968484878540039
          vf_loss: 1.0114324390888214
    num_agent_steps_sampled: 727000
    num_agent_steps_trained: 727000
    num_steps_sampled: 727000
    num_steps_trained: 727000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,727,20072.8,727000,-29.752,-19.2,-42.4,297.52




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 728000
  custom_metrics: {}
  date: 2021-10-22_01-20-49
  done: false
  episode_len_mean: 301.21
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -30.12100000000016
  episode_reward_min: -42.40000000000033
  episodes_this_iter: 3
  episodes_total: 2341
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.875785889464984e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.38634997871187
          entropy_coeff: 0.009999999999999998
          kl: 0.01720984947640684
          policy_loss: 0.1262941295074092
          total_loss: 0.7845356467697355
          vf_explained_var: 0.6557227373123169
          vf_loss: 0.672105022892356
    num_agent_steps_sampled: 728000
    num_agent_steps_trained: 728000
    num_steps_sampled: 728000
    num_steps_trained: 728000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,728,20111.8,728000,-30.121,-19.2,-42.4,301.21


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 729000
  custom_metrics: {}
  date: 2021-10-22_01-21-09
  done: false
  episode_len_mean: 304.26
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -30.42600000000017
  episode_reward_min: -42.40000000000033
  episodes_this_iter: 2
  episodes_total: 2343
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.875785889464984e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.5085094478395251
          entropy_coeff: 0.009999999999999998
          kl: 0.03308409002671087
          policy_loss: -0.057832066549195184
          total_loss: 0.9840192331208123
          vf_explained_var: 0.3193065822124481
          vf_loss: 1.0569364004664952
    num_agent_steps_sampled: 729000
    num_agent_steps_trained: 729000
    num_steps_sampled: 729000
    num_steps_trained: 729000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,729,20132.3,729000,-30.426,-19.2,-42.4,304.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 730000
  custom_metrics: {}
  date: 2021-10-22_01-21-26
  done: false
  episode_len_mean: 310.87
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -31.087000000000174
  episode_reward_min: -51.90000000000047
  episodes_this_iter: 3
  episodes_total: 2346
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3313678834197474e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.584355898698171
          entropy_coeff: 0.009999999999999998
          kl: 0.011657428432489287
          policy_loss: 0.0781608895295196
          total_loss: 0.9732474565505982
          vf_explained_var: 0.2862919569015503
          vf_loss: 0.9109301428414054
    num_agent_steps_sampled: 730000
    num_agent_steps_trained: 730000
    num_steps_sampled: 730000
    num_steps_trained: 730000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,730,20148.6,730000,-31.087,-19.2,-51.9,310.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 731000
  custom_metrics: {}
  date: 2021-10-22_01-21-43
  done: false
  episode_len_mean: 314.93
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -31.493000000000176
  episode_reward_min: -51.90000000000047
  episodes_this_iter: 2
  episodes_total: 2348
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3313678834197474e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.610674382580651
          entropy_coeff: 0.009999999999999998
          kl: 0.019882891011812988
          policy_loss: 0.044638275686237544
          total_loss: 0.7189608848757214
          vf_explained_var: 0.12012510001659393
          vf_loss: 0.6904293500714832
    num_agent_steps_sampled: 731000
    num_agent_steps_trained: 731000
    num_steps_sampled: 731000
    num_steps_trained: 731000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,731,20166.5,731000,-31.493,-19.2,-51.9,314.93


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 732000
  custom_metrics: {}
  date: 2021-10-22_01-22-05
  done: false
  episode_len_mean: 320.65
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -32.06500000000019
  episode_reward_min: -51.90000000000047
  episodes_this_iter: 3
  episodes_total: 2351
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3313678834197474e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5728423953056336
          entropy_coeff: 0.009999999999999998
          kl: 0.029331085561939575
          policy_loss: 0.06538899665077527
          total_loss: 0.9771591362026003
          vf_explained_var: -0.2304237335920334
          vf_loss: 0.9274985672699081
    num_agent_steps_sampled: 732000
    num_agent_steps_trained: 732000
    num_steps_sampled: 732000
    num_steps_trained: 732000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,732,20187.6,732000,-32.065,-21.7,-51.9,320.65


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 733000
  custom_metrics: {}
  date: 2021-10-22_01-22-25
  done: false
  episode_len_mean: 323.6
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -32.36000000000019
  episode_reward_min: -51.90000000000047
  episodes_this_iter: 2
  episodes_total: 2353
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9970518251296212e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.524410687552558
          entropy_coeff: 0.009999999999999998
          kl: 0.02114967199626309
          policy_loss: -0.07816521839963066
          total_loss: 1.0834717634651396
          vf_explained_var: -0.20950403809547424
          vf_loss: 1.176881071511242
    num_agent_steps_sampled: 733000
    num_agent_steps_trained: 733000
    num_steps_sampled: 733000
    num_steps_trained: 733000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,733,20207.9,733000,-32.36,-21.7,-51.9,323.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 734000
  custom_metrics: {}
  date: 2021-10-22_01-22-44
  done: false
  episode_len_mean: 328.73
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -32.8730000000002
  episode_reward_min: -51.90000000000047
  episodes_this_iter: 3
  episodes_total: 2356
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9955777376944305e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5321248875723945
          entropy_coeff: 0.009999999999999998
          kl: 0.018430854271286674
          policy_loss: 0.054622036218643186
          total_loss: 1.25528072781033
          vf_explained_var: 5.5283970141317695e-05
          vf_loss: 1.2159799330764347
    num_agent_steps_sampled: 734000
    num_agent_steps_trained: 734000
    num_steps_sampled: 734000
    num_steps_trained: 734000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,734,20227,734000,-32.873,-21.7,-51.9,328.73


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 735000
  custom_metrics: {}
  date: 2021-10-22_01-23-03
  done: false
  episode_len_mean: 332.07
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -33.20700000000021
  episode_reward_min: -51.90000000000047
  episodes_this_iter: 2
  episodes_total: 2358
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9955777376944305e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4662046816613938
          entropy_coeff: 0.009999999999999998
          kl: 0.03940956387725818
          policy_loss: -0.07091732256942325
          total_loss: 1.0424967467784882
          vf_explained_var: -0.10623924434185028
          vf_loss: 1.1280761128498449
    num_agent_steps_sampled: 735000
    num_agent_steps_trained: 735000
    num_steps_sampled: 735000
    num_steps_trained: 735000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,735,20246.3,735000,-33.207,-22,-51.9,332.07


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 736000
  custom_metrics: {}
  date: 2021-10-22_01-23-21
  done: false
  episode_len_mean: 336.25
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -33.62500000000021
  episode_reward_min: -51.90000000000047
  episodes_this_iter: 2
  episodes_total: 2360
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.4933666065416484e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.2471738470925224
          entropy_coeff: 0.009999999999999998
          kl: 0.05179891571563044
          policy_loss: -0.08434418837229411
          total_loss: 0.9885339962111579
          vf_explained_var: 0.004368231166154146
          vf_loss: 1.0853499285048909
    num_agent_steps_sampled: 736000
    num_agent_steps_trained: 736000
    num_steps_sampled: 736000
    num_steps_trained: 736000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,736,20263.9,736000,-33.625,-22,-51.9,336.25


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 737000
  custom_metrics: {}
  date: 2021-10-22_01-23-37
  done: false
  episode_len_mean: 343.3
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -34.330000000000226
  episode_reward_min: -51.90000000000047
  episodes_this_iter: 3
  episodes_total: 2363
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.740049909812472e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.2424409945805868
          entropy_coeff: 0.009999999999999998
          kl: 0.01594368197387076
          policy_loss: 0.040274067471424736
          total_loss: 1.1847838487890032
          vf_explained_var: -0.07560044527053833
          vf_loss: 1.156934196750323
    num_agent_steps_sampled: 737000
    num_agent_steps_trained: 737000
    num_steps_sampled: 737000
    num_steps_trained: 737000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,737,20280.2,737000,-34.33,-22,-51.9,343.3


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 738000
  custom_metrics: {}
  date: 2021-10-22_01-23-55
  done: false
  episode_len_mean: 346.42
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -34.64200000000022
  episode_reward_min: -51.90000000000047
  episodes_this_iter: 2
  episodes_total: 2365
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.740049909812472e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.209029127491845
          entropy_coeff: 0.009999999999999998
          kl: 0.02148371086680064
          policy_loss: 0.012704957524935405
          total_loss: 0.7157475017839008
          vf_explained_var: 0.40724867582321167
          vf_loss: 0.7151328366663721
    num_agent_steps_sampled: 738000
    num_agent_steps_trained: 738000
    num_steps_sampled: 738000
    num_steps_trained: 738000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,738,20298.4,738000,-34.642,-22,-51.9,346.42


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 739000
  custom_metrics: {}
  date: 2021-10-22_01-24-13
  done: false
  episode_len_mean: 347.84
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -34.784000000000226
  episode_reward_min: -51.90000000000047
  episodes_this_iter: 2
  episodes_total: 2367
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0110074864718708e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1742154545254178
          entropy_coeff: 0.009999999999999998
          kl: 0.011681981048050242
          policy_loss: -0.09023740887641907
          total_loss: 1.1710772342152065
          vf_explained_var: -0.2870761454105377
          vf_loss: 1.2730568014913135
    num_agent_steps_sampled: 739000
    num_agent_steps_trained: 739000
    num_steps_sampled: 739000
    num_steps_trained: 739000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,739,20315.9,739000,-34.784,-22,-51.9,347.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 740000
  custom_metrics: {}
  date: 2021-10-22_01-24-28
  done: false
  episode_len_mean: 350.31
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -35.031000000000226
  episode_reward_min: -51.90000000000047
  episodes_this_iter: 2
  episodes_total: 2369
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0110074864718708e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1513534877035352
          entropy_coeff: 0.009999999999999998
          kl: 0.009864057661504451
          policy_loss: -0.07269783094525337
          total_loss: 1.0731863809956446
          vf_explained_var: 0.013335057534277439
          vf_loss: 1.157397749937243
    num_agent_steps_sampled: 740000
    num_agent_steps_trained: 740000
    num_steps_sampled: 740000
    num_steps_trained: 740000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,740,20331.2,740000,-35.031,-22,-51.9,350.31




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 741000
  custom_metrics: {}
  date: 2021-10-22_01-25-03
  done: false
  episode_len_mean: 353.83
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -35.38300000000024
  episode_reward_min: -51.90000000000047
  episodes_this_iter: 2
  episodes_total: 2371
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0110074864718708e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1401373068491618
          entropy_coeff: 0.009999999999999998
          kl: 0.00859046814820393
          policy_loss: -0.09046729604403178
          total_loss: 1.1927525725629595
          vf_explained_var: -0.10237909108400345
          vf_loss: 1.2946212408443292
    num_agent_steps_sampled: 741000
    num_agent_steps_trained: 741000
    num_steps_sampled: 741000
    num_steps_trained: 741000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,741,20365.7,741000,-35.383,-22,-51.9,353.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 742000
  custom_metrics: {}
  date: 2021-10-22_01-25-19
  done: false
  episode_len_mean: 358.45
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -35.84500000000024
  episode_reward_min: -51.90000000000047
  episodes_this_iter: 2
  episodes_total: 2373
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0110074864718708e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1367479893896315
          entropy_coeff: 0.009999999999999998
          kl: 0.0077505804986497
          policy_loss: -0.0843076906270451
          total_loss: 1.2006341331534915
          vf_explained_var: 0.10641070455312729
          vf_loss: 1.2963093052721686
    num_agent_steps_sampled: 742000
    num_agent_steps_trained: 742000
    num_steps_sampled: 742000
    num_steps_trained: 742000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,742,20382,742000,-35.845,-22,-51.9,358.45


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 743000
  custom_metrics: {}
  date: 2021-10-22_01-25-33
  done: false
  episode_len_mean: 363.47
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -36.34700000000025
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2375
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0110074864718708e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1814374883969625
          entropy_coeff: 0.009999999999999998
          kl: 0.008044300236222076
          policy_loss: -0.08573331915669971
          total_loss: 1.253946374853452
          vf_explained_var: 0.06254848092794418
          vf_loss: 1.3514940901348989
    num_agent_steps_sampled: 743000
    num_agent_steps_trained: 743000
    num_steps_sampled: 743000
    num_steps_trained: 743000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,743,20396.2,743000,-36.347,-22,-54.9,363.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 744000
  custom_metrics: {}
  date: 2021-10-22_01-25-49
  done: false
  episode_len_mean: 368.54
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -36.854000000000255
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2377
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0110074864718708e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1413881063461304
          entropy_coeff: 0.009999999999999998
          kl: 0.012672017189680041
          policy_loss: -0.08473944548103544
          total_loss: 1.0905622565084034
          vf_explained_var: -0.19932806491851807
          vf_loss: 1.1867155879735947
    num_agent_steps_sampled: 744000
    num_agent_steps_trained: 744000
    num_steps_sampled: 744000
    num_steps_trained: 744000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,744,20411.8,744000,-36.854,-22,-54.9,368.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 745000
  custom_metrics: {}
  date: 2021-10-22_01-26-04
  done: false
  episode_len_mean: 373.72
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -37.37200000000026
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2379
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0110074864718708e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.165791384379069
          entropy_coeff: 0.009999999999999998
          kl: 0.007360838361200095
          policy_loss: -0.06607741249932184
          total_loss: 1.1261780030197568
          vf_explained_var: 0.2920002341270447
          vf_loss: 1.203913329127762
    num_agent_steps_sampled: 745000
    num_agent_steps_trained: 745000
    num_steps_sampled: 745000
    num_steps_trained: 745000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,745,20426.7,745000,-37.372,-22,-54.9,373.72


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 746000
  custom_metrics: {}
  date: 2021-10-22_01-26-20
  done: false
  episode_len_mean: 379.06
  episode_media: {}
  episode_reward_max: -25.000000000000085
  episode_reward_mean: -37.90600000000027
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2381
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0110074864718708e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.197955584526062
          entropy_coeff: 0.009999999999999998
          kl: 0.009894628044134872
          policy_loss: -0.08208101971281899
          total_loss: 1.2275984436273575
          vf_explained_var: 0.22372356057167053
          vf_loss: 1.3216590294407473
    num_agent_steps_sampled: 746000
    num_agent_steps_trained: 746000
    num_steps_sampled: 746000
    num_steps_trained: 746000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,746,20442.6,746000,-37.906,-25,-54.9,379.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 747000
  custom_metrics: {}
  date: 2021-10-22_01-26-36
  done: false
  episode_len_mean: 383.01
  episode_media: {}
  episode_reward_max: -27.900000000000126
  episode_reward_mean: -38.30100000000028
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2383
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0110074864718708e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.167458889219496
          entropy_coeff: 0.009999999999999998
          kl: 0.025499926147784575
          policy_loss: -0.06609347677893108
          total_loss: 1.0099896980656518
          vf_explained_var: -0.19374972581863403
          vf_loss: 1.0877577605346838
    num_agent_steps_sampled: 747000
    num_agent_steps_trained: 747000
    num_steps_sampled: 747000
    num_steps_trained: 747000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,747,20459,747000,-38.301,-27.9,-54.9,383.01


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 748000
  custom_metrics: {}
  date: 2021-10-22_01-26-52
  done: false
  episode_len_mean: 385.81
  episode_media: {}
  episode_reward_max: -27.900000000000126
  episode_reward_mean: -38.58100000000028
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2385
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5165112297078057e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1853324161635506
          entropy_coeff: 0.009999999999999998
          kl: 0.01254703201043574
          policy_loss: -0.08304848058356179
          total_loss: 1.1978680494758818
          vf_explained_var: -0.10343014448881149
          vf_loss: 1.2927698411875301
    num_agent_steps_sampled: 748000
    num_agent_steps_trained: 748000
    num_steps_sampled: 748000
    num_steps_trained: 748000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,748,20475,748000,-38.581,-27.9,-54.9,385.81


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 749000
  custom_metrics: {}
  date: 2021-10-22_01-27-08
  done: false
  episode_len_mean: 388.81
  episode_media: {}
  episode_reward_max: -27.900000000000126
  episode_reward_mean: -38.881000000000284
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2387
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5165112297078057e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1960037178463405
          entropy_coeff: 0.009999999999999998
          kl: 0.006036983085512935
          policy_loss: -0.06796583963765038
          total_loss: 1.1930638217263752
          vf_explained_var: 0.08571143448352814
          vf_loss: 1.2729897016452418
    num_agent_steps_sampled: 749000
    num_agent_steps_trained: 749000
    num_steps_sampled: 749000
    num_steps_trained: 749000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,749,20490.7,749000,-38.881,-27.9,-54.9,388.81


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 750000
  custom_metrics: {}
  date: 2021-10-22_01-27-25
  done: false
  episode_len_mean: 391.65
  episode_media: {}
  episode_reward_max: -27.900000000000126
  episode_reward_mean: -39.16500000000028
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2389
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5165112297078057e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2316505736774868
          entropy_coeff: 0.009999999999999998
          kl: 0.012490049321164933
          policy_loss: -0.0738219150238567
          total_loss: 1.1387414117654164
          vf_explained_var: -0.11821239441633224
          vf_loss: 1.2248798271848096
    num_agent_steps_sampled: 750000
    num_agent_steps_trained: 750000
    num_steps_sampled: 750000
    num_steps_trained: 750000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,750,20507.2,750000,-39.165,-27.9,-54.9,391.65


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 751000
  custom_metrics: {}
  date: 2021-10-22_01-27-41
  done: false
  episode_len_mean: 395.9
  episode_media: {}
  episode_reward_max: -27.900000000000126
  episode_reward_mean: -39.590000000000295
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2392
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5165112297078057e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2740599009725782
          entropy_coeff: 0.009999999999999998
          kl: 0.02465350863294123
          policy_loss: 0.051869635780652366
          total_loss: 1.2715155094861985
          vf_explained_var: -0.04297211021184921
          vf_loss: 1.2323864852802622
    num_agent_steps_sampled: 751000
    num_agent_steps_trained: 751000
    num_steps_sampled: 751000
    num_steps_trained: 751000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,751,20523.3,751000,-39.59,-27.9,-54.9,395.9


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 752000
  custom_metrics: {}
  date: 2021-10-22_01-27-58
  done: false
  episode_len_mean: 398.8
  episode_media: {}
  episode_reward_max: -27.900000000000126
  episode_reward_mean: -39.8800000000003
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2394
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2747668445617085e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.3011124345991347
          entropy_coeff: 0.009999999999999998
          kl: 0.01329350663252645
          policy_loss: 0.10946383641825783
          total_loss: 0.6940785451067819
          vf_explained_var: 0.37269604206085205
          vf_loss: 0.5976258375578456
    num_agent_steps_sampled: 752000
    num_agent_steps_trained: 752000
    num_steps_sampled: 752000
    num_steps_trained: 752000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,752,20540.1,752000,-39.88,-27.9,-54.9,398.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 753000
  custom_metrics: {}
  date: 2021-10-22_01-28-16
  done: false
  episode_len_mean: 401.06
  episode_media: {}
  episode_reward_max: -27.900000000000126
  episode_reward_mean: -40.10600000000031
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2396
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2747668445617085e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.287440213892195
          entropy_coeff: 0.009999999999999998
          kl: 0.013248047838770205
          policy_loss: -0.08646040674712922
          total_loss: 1.2237265871630774
          vf_explained_var: 0.13041336834430695
          vf_loss: 1.3230613962643676
    num_agent_steps_sampled: 753000
    num_agent_steps_trained: 753000
    num_steps_sampled: 753000
    num_steps_trained: 753000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,753,20558.4,753000,-40.106,-27.9,-54.9,401.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 754000
  custom_metrics: {}
  date: 2021-10-22_01-28-33
  done: false
  episode_len_mean: 403.67
  episode_media: {}
  episode_reward_max: -27.900000000000126
  episode_reward_mean: -40.36700000000031
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2398
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2747668445617085e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.274326287375556
          entropy_coeff: 0.009999999999999998
          kl: 0.014400150210581508
          policy_loss: -0.07248346424765056
          total_loss: 0.9973336637020112
          vf_explained_var: -0.11391039192676544
          vf_loss: 1.0825603865914875
    num_agent_steps_sampled: 754000
    num_agent_steps_trained: 754000
    num_steps_sampled: 754000
    num_steps_trained: 754000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,754,20575.6,754000,-40.367,-27.9,-54.9,403.67




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 755000
  custom_metrics: {}
  date: 2021-10-22_01-29-11
  done: false
  episode_len_mean: 406.73
  episode_media: {}
  episode_reward_max: -28.600000000000136
  episode_reward_mean: -40.673000000000314
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2401
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2747668445617085e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2265202244122824
          entropy_coeff: 0.009999999999999998
          kl: 0.00662087214885267
          policy_loss: 0.036262498299280806
          total_loss: 1.3000949952337477
          vf_explained_var: 0.20301055908203125
          vf_loss: 1.2760976889895068
    num_agent_steps_sampled: 755000
    num_agent_steps_trained: 755000
    num_steps_sampled: 755000
    num_steps_trained: 755000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,755,20613.3,755000,-40.673,-28.6,-54.9,406.73


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 756000
  custom_metrics: {}
  date: 2021-10-22_01-29-29
  done: false
  episode_len_mean: 408.92
  episode_media: {}
  episode_reward_max: -28.600000000000136
  episode_reward_mean: -40.89200000000031
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2403
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2747668445617085e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1918701582484774
          entropy_coeff: 0.009999999999999998
          kl: 0.012306877367279551
          policy_loss: -0.082306293812063
          total_loss: 1.1546855055623584
          vf_explained_var: -0.21522346138954163
          vf_loss: 1.248910510705577
    num_agent_steps_sampled: 756000
    num_agent_steps_trained: 756000
    num_steps_sampled: 756000
    num_steps_trained: 756000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,756,20631.8,756000,-40.892,-28.6,-54.9,408.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 757000
  custom_metrics: {}
  date: 2021-10-22_01-29-49
  done: false
  episode_len_mean: 410.41
  episode_media: {}
  episode_reward_max: -28.600000000000136
  episode_reward_mean: -41.04100000000031
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2406
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2747668445617085e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2026855640941196
          entropy_coeff: 0.009999999999999998
          kl: 0.008564753232907129
          policy_loss: 0.033830159985356864
          total_loss: 1.3177631901370155
          vf_explained_var: -0.21457701921463013
          vf_loss: 1.2959598792095979
    num_agent_steps_sampled: 757000
    num_agent_steps_trained: 757000
    num_steps_sampled: 757000
    num_steps_trained: 757000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,757,20651.4,757000,-41.041,-28.6,-54.9,410.41


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 758000
  custom_metrics: {}
  date: 2021-10-22_01-30-08
  done: false
  episode_len_mean: 411.45
  episode_media: {}
  episode_reward_max: -28.600000000000136
  episode_reward_mean: -41.145000000000316
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2409
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2747668445617085e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2137605614132352
          entropy_coeff: 0.009999999999999998
          kl: 0.004454213418811924
          policy_loss: 0.046393105139335
          total_loss: 1.2528906053966946
          vf_explained_var: 0.14446085691452026
          vf_loss: 1.2186350994639927
    num_agent_steps_sampled: 758000
    num_agent_steps_trained: 758000
    num_steps_sampled: 758000
    num_steps_trained: 758000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,758,20671,758000,-41.145,-28.6,-54.9,411.45


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 759000
  custom_metrics: {}
  date: 2021-10-22_01-30-30
  done: false
  episode_len_mean: 412.16
  episode_media: {}
  episode_reward_max: -29.400000000000148
  episode_reward_mean: -41.216000000000314
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2411
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1373834222808542e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1592434167861938
          entropy_coeff: 0.009999999999999998
          kl: 0.010531414778631445
          policy_loss: -0.05345697767204709
          total_loss: 1.3502545303768583
          vf_explained_var: 0.05354829505085945
          vf_loss: 1.4153039519985517
    num_agent_steps_sampled: 759000
    num_agent_steps_trained: 759000
    num_steps_sampled: 759000
    num_steps_trained: 759000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,759,20692.2,759000,-41.216,-29.4,-54.9,412.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 760000
  custom_metrics: {}
  date: 2021-10-22_01-30-52
  done: false
  episode_len_mean: 412.78
  episode_media: {}
  episode_reward_max: -30.600000000000165
  episode_reward_mean: -41.27800000000032
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2414
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1373834222808542e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.104158259762658
          entropy_coeff: 0.009999999999999998
          kl: 0.007160181403789028
          policy_loss: -0.08379501187139088
          total_loss: 1.3224380135536193
          vf_explained_var: 0.2433459609746933
          vf_loss: 1.4172746035787793
    num_agent_steps_sampled: 760000
    num_agent_steps_trained: 760000
    num_steps_sampled: 760000
    num_steps_trained: 760000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,760,20714.2,760000,-41.278,-30.6,-54.9,412.78


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 761000
  custom_metrics: {}
  date: 2021-10-22_01-31-13
  done: false
  episode_len_mean: 413.19
  episode_media: {}
  episode_reward_max: -31.800000000000182
  episode_reward_mean: -41.319000000000315
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2417
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1373834222808542e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1373191197713217
          entropy_coeff: 0.009999999999999998
          kl: 0.01625995314180402
          policy_loss: -0.10443419631984499
          total_loss: 1.09997369547685
          vf_explained_var: 0.4318913221359253
          vf_loss: 1.21578106880188
    num_agent_steps_sampled: 761000
    num_agent_steps_trained: 761000
    num_steps_sampled: 761000
    num_steps_trained: 761000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,761,20735.6,761000,-41.319,-31.8,-54.9,413.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 762000
  custom_metrics: {}
  date: 2021-10-22_01-31-35
  done: false
  episode_len_mean: 412.95
  episode_media: {}
  episode_reward_max: -31.800000000000182
  episode_reward_mean: -41.29500000000032
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2420
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1373834222808542e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0914204001426697
          entropy_coeff: 0.009999999999999998
          kl: 0.01706457986232803
          policy_loss: -0.1104479520685143
          total_loss: 1.069254834122128
          vf_explained_var: 0.5177959203720093
          vf_loss: 1.1906169917848375
    num_agent_steps_sampled: 762000
    num_agent_steps_trained: 762000
    num_steps_sampled: 762000
    num_steps_trained: 762000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,762,20757.1,762000,-41.295,-31.8,-54.9,412.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 763000
  custom_metrics: {}
  date: 2021-10-22_01-31-56
  done: false
  episode_len_mean: 411.42
  episode_media: {}
  episode_reward_max: -31.200000000000173
  episode_reward_mean: -41.142000000000316
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2423
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1373834222808542e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0947365668084887
          entropy_coeff: 0.009999999999999998
          kl: 0.01395461757802789
          policy_loss: -0.09722634587022994
          total_loss: 0.33316740211513307
          vf_explained_var: 0.8746168613433838
          vf_loss: 0.44134111768669554
    num_agent_steps_sampled: 763000
    num_agent_steps_trained: 763000
    num_steps_sampled: 763000
    num_steps_trained: 763000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,763,20778.8,763000,-41.142,-31.2,-54.9,411.42


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 764000
  custom_metrics: {}
  date: 2021-10-22_01-32-19
  done: false
  episode_len_mean: 409.93
  episode_media: {}
  episode_reward_max: -31.200000000000173
  episode_reward_mean: -40.99300000000032
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2426
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1373834222808542e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0751817127068837
          entropy_coeff: 0.009999999999999998
          kl: 0.02966411199733735
          policy_loss: -0.02106894850730896
          total_loss: 1.0775079309940339
          vf_explained_var: 0.7295919060707092
          vf_loss: 1.1093286958005693
    num_agent_steps_sampled: 764000
    num_agent_steps_trained: 764000
    num_steps_sampled: 764000
    num_steps_trained: 764000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,764,20801.3,764000,-40.993,-31.2,-54.9,409.93


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 765000
  custom_metrics: {}
  date: 2021-10-22_01-32-40
  done: false
  episode_len_mean: 409.79
  episode_media: {}
  episode_reward_max: -31.200000000000173
  episode_reward_mean: -40.97900000000031
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2429
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.706075133421282e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1989315973387824
          entropy_coeff: 0.009999999999999998
          kl: 0.024261901920944715
          policy_loss: 0.017782233075963126
          total_loss: 1.3413813147279952
          vf_explained_var: 0.41188162565231323
          vf_loss: 1.3355884108278486
    num_agent_steps_sampled: 765000
    num_agent_steps_trained: 765000
    num_steps_sampled: 765000
    num_steps_trained: 765000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,765,20821.9,765000,-40.979,-31.2,-54.9,409.79




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 766000
  custom_metrics: {}
  date: 2021-10-22_01-33-19
  done: false
  episode_len_mean: 409.05
  episode_media: {}
  episode_reward_max: -31.200000000000173
  episode_reward_mean: -40.90500000000031
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2432
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.559112700131922e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2149240295092265
          entropy_coeff: 0.009999999999999998
          kl: 0.026521421038457997
          policy_loss: -0.03256997019052506
          total_loss: 1.3388769540521834
          vf_explained_var: 0.29263782501220703
          vf_loss: 1.3835961745844947
    num_agent_steps_sampled: 766000
    num_agent_steps_trained: 766000
    num_steps_sampled: 766000
    num_steps_trained: 766000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,766,20861.1,766000,-40.905,-31.2,-54.9,409.05


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 767000
  custom_metrics: {}
  date: 2021-10-22_01-33-37
  done: false
  episode_len_mean: 411.48
  episode_media: {}
  episode_reward_max: -31.200000000000173
  episode_reward_mean: -41.14800000000032
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2435
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.838669050197884e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2938099092907376
          entropy_coeff: 0.009999999999999998
          kl: 0.011125382686653355
          policy_loss: 0.03323976405792766
          total_loss: 0.8653917193412781
          vf_explained_var: 0.21321508288383484
          vf_loss: 0.8450900487808718
    num_agent_steps_sampled: 767000
    num_agent_steps_trained: 767000
    num_steps_sampled: 767000
    num_steps_trained: 767000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,767,20879.7,767000,-41.148,-31.2,-54.9,411.48


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 768000
  custom_metrics: {}
  date: 2021-10-22_01-33-55
  done: false
  episode_len_mean: 413.22
  episode_media: {}
  episode_reward_max: -31.200000000000173
  episode_reward_mean: -41.322000000000315
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2437
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.838669050197884e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.3000618881649442
          entropy_coeff: 0.009999999999999998
          kl: 0.021377411193669045
          policy_loss: 0.12392084432972802
          total_loss: 0.549396989080641
          vf_explained_var: 0.2539108395576477
          vf_loss: 0.4384767674323585
    num_agent_steps_sampled: 768000
    num_agent_steps_trained: 768000
    num_steps_sampled: 768000
    num_steps_trained: 768000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,768,20896.8,768000,-41.322,-31.2,-54.9,413.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 769000
  custom_metrics: {}
  date: 2021-10-22_01-34-11
  done: false
  episode_len_mean: 415.38
  episode_media: {}
  episode_reward_max: -31.200000000000173
  episode_reward_mean: -41.538000000000324
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2439
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.758003575296826e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.277407381269667
          entropy_coeff: 0.009999999999999998
          kl: 0.02502725564463625
          policy_loss: -0.06846090290281508
          total_loss: 0.7120901107788086
          vf_explained_var: 0.0975043848156929
          vf_loss: 0.7933250806604822
    num_agent_steps_sampled: 769000
    num_agent_steps_trained: 769000
    num_steps_sampled: 769000
    num_steps_trained: 769000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,769,20912.7,769000,-41.538,-31.2,-54.9,415.38


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 770000
  custom_metrics: {}
  date: 2021-10-22_01-34-28
  done: false
  episode_len_mean: 417.82
  episode_media: {}
  episode_reward_max: -31.200000000000173
  episode_reward_mean: -41.78200000000032
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2441
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.637005362945239e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2722074707349142
          entropy_coeff: 0.009999999999999998
          kl: 0.020471773409869973
          policy_loss: -0.029895981980694666
          total_loss: 0.9128328952524397
          vf_explained_var: -0.18988625705242157
          vf_loss: 0.9554509253965484
    num_agent_steps_sampled: 770000
    num_agent_steps_trained: 770000
    num_steps_sampled: 770000
    num_steps_trained: 770000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,770,20930.3,770000,-41.782,-31.2,-54.9,417.82


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 771000
  custom_metrics: {}
  date: 2021-10-22_01-34-45
  done: false
  episode_len_mean: 419.43
  episode_media: {}
  episode_reward_max: -31.200000000000173
  episode_reward_mean: -41.94300000000032
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2443
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.295550804441786e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2776930040783352
          entropy_coeff: 0.009999999999999998
          kl: 0.007541462503415156
          policy_loss: -0.05586922665437063
          total_loss: 0.9696749034855101
          vf_explained_var: 0.20801450312137604
          vf_loss: 1.0383210656543573
    num_agent_steps_sampled: 771000
    num_agent_steps_trained: 771000
    num_steps_sampled: 771000
    num_steps_trained: 771000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,771,20946.8,771000,-41.943,-31.2,-54.9,419.43


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 772000
  custom_metrics: {}
  date: 2021-10-22_01-35-03
  done: false
  episode_len_mean: 419.39
  episode_media: {}
  episode_reward_max: -31.200000000000173
  episode_reward_mean: -41.93900000000031
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2446
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.295550804441786e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2461343963940938
          entropy_coeff: 0.009999999999999998
          kl: 0.014237123891196607
          policy_loss: 0.053607809046904244
          total_loss: 1.0915847665733762
          vf_explained_var: 0.0349661186337471
          vf_loss: 1.0504382725391124
    num_agent_steps_sampled: 772000
    num_agent_steps_trained: 772000
    num_steps_sampled: 772000
    num_steps_trained: 772000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,772,20965.2,772000,-41.939,-31.2,-54.9,419.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 773000
  custom_metrics: {}
  date: 2021-10-22_01-35-22
  done: false
  episode_len_mean: 418.92
  episode_media: {}
  episode_reward_max: -31.200000000000173
  episode_reward_mean: -41.89200000000032
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2448
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.295550804441786e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2136724326345656
          entropy_coeff: 0.009999999999999998
          kl: 0.010269947608506098
          policy_loss: -0.07945379043618837
          total_loss: 0.8534910455346107
          vf_explained_var: 0.27067798376083374
          vf_loss: 0.9450815566711955
    num_agent_steps_sampled: 773000
    num_agent_steps_trained: 773000
    num_steps_sampled: 773000
    num_steps_trained: 773000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,773,20984.3,773000,-41.892,-31.2,-54.9,418.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 774000
  custom_metrics: {}
  date: 2021-10-22_01-35-42
  done: false
  episode_len_mean: 418.8
  episode_media: {}
  episode_reward_max: -31.200000000000173
  episode_reward_mean: -41.88000000000033
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2451
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.295550804441786e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1981139143308004
          entropy_coeff: 0.009999999999999998
          kl: 0.013692047060398869
          policy_loss: 0.030434646871354845
          total_loss: 0.8735854178667068
          vf_explained_var: 0.4388125240802765
          vf_loss: 0.8551318913905157
    num_agent_steps_sampled: 774000
    num_agent_steps_trained: 774000
    num_steps_sampled: 774000
    num_steps_trained: 774000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,774,21003.7,774000,-41.88,-31.2,-54.9,418.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 775000
  custom_metrics: {}
  date: 2021-10-22_01-36-01
  done: false
  episode_len_mean: 419.12
  episode_media: {}
  episode_reward_max: -31.200000000000173
  episode_reward_mean: -41.91200000000033
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 2
  episodes_total: 2453
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.295550804441786e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2249887612130907
          entropy_coeff: 0.009999999999999998
          kl: 0.016197945997491716
          policy_loss: -0.05462239550219642
          total_loss: 0.514288423789872
          vf_explained_var: 0.5916403532028198
          vf_loss: 0.5811606941330764
    num_agent_steps_sampled: 775000
    num_agent_steps_trained: 775000
    num_steps_sampled: 775000
    num_steps_trained: 775000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,775,21023.3,775000,-41.912,-31.2,-54.9,419.12


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 776000
  custom_metrics: {}
  date: 2021-10-22_01-36-19
  done: false
  episode_len_mean: 419.39
  episode_media: {}
  episode_reward_max: -31.200000000000173
  episode_reward_mean: -41.93900000000033
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2456
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.295550804441786e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2822991596327888
          entropy_coeff: 0.009999999999999998
          kl: 0.04254829735770266
          policy_loss: 0.06366089921858575
          total_loss: 0.46481312099430294
          vf_explained_var: 0.8074567914009094
          vf_loss: 0.41397515961693393
    num_agent_steps_sampled: 776000
    num_agent_steps_trained: 776000
    num_steps_sampled: 776000
    num_steps_trained: 776000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,776,21040.7,776000,-41.939,-31.2,-54.9,419.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 777000
  custom_metrics: {}
  date: 2021-10-22_01-36-41
  done: false
  episode_len_mean: 417.54
  episode_media: {}
  episode_reward_max: -31.200000000000173
  episode_reward_mean: -41.754000000000325
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2459
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9433262066626784e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2677492751015558
          entropy_coeff: 0.009999999999999998
          kl: 0.018559902180108327
          policy_loss: 0.05208124352826012
          total_loss: 0.5345292564895417
          vf_explained_var: 0.8704081773757935
          vf_loss: 0.495125477678246
    num_agent_steps_sampled: 777000
    num_agent_steps_trained: 777000
    num_steps_sampled: 777000
    num_steps_trained: 777000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,777,21062.7,777000,-41.754,-31.2,-54.9,417.54




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 778000
  custom_metrics: {}
  date: 2021-10-22_01-37-19
  done: false
  episode_len_mean: 414.13
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -41.41300000000031
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2462
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9433262066626784e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2143993298212687
          entropy_coeff: 0.009999999999999998
          kl: 0.012323141313510888
          policy_loss: 0.1046071466886335
          total_loss: 0.7513656953970591
          vf_explained_var: 0.6684598326683044
          vf_loss: 0.6589025184512138
    num_agent_steps_sampled: 778000
    num_agent_steps_trained: 778000
    num_steps_sampled: 778000
    num_steps_trained: 778000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,778,21101.4,778000,-41.413,-31,-54.9,414.13


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 779000
  custom_metrics: {}
  date: 2021-10-22_01-37-42
  done: false
  episode_len_mean: 410.92
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -41.09200000000032
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2465
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9433262066626784e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2658678823047214
          entropy_coeff: 0.009999999999999998
          kl: 0.013254921738925734
          policy_loss: 0.029612771173318227
          total_loss: 0.9750044372346666
          vf_explained_var: 0.6382842063903809
          vf_loss: 0.9580503264235125
    num_agent_steps_sampled: 779000
    num_agent_steps_trained: 779000
    num_steps_sampled: 779000
    num_steps_trained: 779000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,779,21123.6,779000,-41.092,-31,-54.9,410.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 780000
  custom_metrics: {}
  date: 2021-10-22_01-38-04
  done: false
  episode_len_mean: 406.82
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -40.682000000000315
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2468
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9433262066626784e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3405178361468846
          entropy_coeff: 0.009999999999999998
          kl: 0.014565029465819319
          policy_loss: 0.034320365554756586
          total_loss: 0.7981107900540034
          vf_explained_var: 0.605374276638031
          vf_loss: 0.7771955633742942
    num_agent_steps_sampled: 780000
    num_agent_steps_trained: 780000
    num_steps_sampled: 780000
    num_steps_trained: 780000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,780,21145.9,780000,-40.682,-31,-54.9,406.82


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 781000
  custom_metrics: {}
  date: 2021-10-22_01-38-25
  done: false
  episode_len_mean: 402.64
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -40.26400000000031
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2471
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9433262066626784e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3294641296068828
          entropy_coeff: 0.009999999999999998
          kl: 0.04379760737283291
          policy_loss: 0.029362997412681578
          total_loss: 0.7009480579031838
          vf_explained_var: 0.6929333806037903
          vf_loss: 0.6848796120948262
    num_agent_steps_sampled: 781000
    num_agent_steps_trained: 781000
    num_steps_sampled: 781000
    num_steps_trained: 781000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,781,21167.4,781000,-40.264,-31,-54.9,402.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 782000
  custom_metrics: {}
  date: 2021-10-22_01-38-46
  done: false
  episode_len_mean: 398.37
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -39.8370000000003
  episode_reward_min: -54.90000000000051
  episodes_this_iter: 3
  episodes_total: 2474
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9149893099940187e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3821535560819838
          entropy_coeff: 0.009999999999999998
          kl: 0.029543668559869316
          policy_loss: 0.05981844746404224
          total_loss: 1.1774580505159167
          vf_explained_var: 0.34001216292381287
          vf_loss: 1.1314610441525776
    num_agent_steps_sampled: 782000
    num_agent_steps_trained: 782000
    num_steps_sampled: 782000
    num_steps_trained: 782000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,782,21187.8,782000,-39.837,-31,-54.9,398.37


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 783000
  custom_metrics: {}
  date: 2021-10-22_01-39-07
  done: false
  episode_len_mean: 394.26
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -39.42600000000029
  episode_reward_min: -51.40000000000046
  episodes_this_iter: 2
  episodes_total: 2476
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.372483964991026e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.4337273650699192
          entropy_coeff: 0.009999999999999998
          kl: 0.023589828381633596
          policy_loss: -0.07765453606843949
          total_loss: 0.9313523405128055
          vf_explained_var: 0.2091824859380722
          vf_loss: 1.023344055811564
    num_agent_steps_sampled: 783000
    num_agent_steps_trained: 783000
    num_steps_sampled: 783000
    num_steps_trained: 783000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,783,21209.2,783000,-39.426,-31,-51.4,394.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 784000
  custom_metrics: {}
  date: 2021-10-22_01-39-28
  done: false
  episode_len_mean: 390.08
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -39.00800000000029
  episode_reward_min: -51.40000000000046
  episodes_this_iter: 3
  episodes_total: 2479
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.558725947486541e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.370282730791304
          entropy_coeff: 0.009999999999999998
          kl: 0.02625307947733469
          policy_loss: -0.09457198969191975
          total_loss: 1.4804223034116957
          vf_explained_var: 0.1261487901210785
          vf_loss: 1.588696947362688
    num_agent_steps_sampled: 784000
    num_agent_steps_trained: 784000
    num_steps_sampled: 784000
    num_steps_trained: 784000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,784,21229.9,784000,-39.008,-31,-51.4,390.08


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 785000
  custom_metrics: {}
  date: 2021-10-22_01-39-48
  done: false
  episode_len_mean: 386.4
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -38.640000000000285
  episode_reward_min: -50.00000000000044
  episodes_this_iter: 3
  episodes_total: 2482
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.83808892122981e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3502033260133532
          entropy_coeff: 0.009999999999999998
          kl: 0.012228209772893992
          policy_loss: 0.05161990192201402
          total_loss: 1.2018459578355154
          vf_explained_var: -0.2246447205543518
          vf_loss: 1.1637279643780656
    num_agent_steps_sampled: 785000
    num_agent_steps_trained: 785000
    num_steps_sampled: 785000
    num_steps_trained: 785000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,785,21250.1,785000,-38.64,-31,-50,386.4


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 786000
  custom_metrics: {}
  date: 2021-10-22_01-40-10
  done: false
  episode_len_mean: 382.19
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -38.21900000000028
  episode_reward_min: -49.700000000000436
  episodes_this_iter: 3
  episodes_total: 2485
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.83808892122981e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3230030430687798
          entropy_coeff: 0.009999999999999998
          kl: 0.01342800696823099
          policy_loss: 0.07177926434410943
          total_loss: 1.0094784362448586
          vf_explained_var: 0.18570131063461304
          vf_loss: 0.9509290849789978
    num_agent_steps_sampled: 786000
    num_agent_steps_trained: 786000
    num_steps_sampled: 786000
    num_steps_trained: 786000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,786,21271.6,786000,-38.219,-31,-49.7,382.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 787000
  custom_metrics: {}
  date: 2021-10-22_01-40-30
  done: false
  episode_len_mean: 378.89
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.889000000000266
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 3
  episodes_total: 2488
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.83808892122981e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3246928877300685
          entropy_coeff: 0.009999999999999998
          kl: 0.01602267592655314
          policy_loss: 0.05820992382036315
          total_loss: 1.0987873554229737
          vf_explained_var: 0.10954958945512772
          vf_loss: 1.0538242167896694
    num_agent_steps_sampled: 787000
    num_agent_steps_trained: 787000
    num_steps_sampled: 787000
    num_steps_trained: 787000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,787,21291.5,787000,-37.889,-31,-47.8,378.89




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 788000
  custom_metrics: {}
  date: 2021-10-22_01-41-09
  done: false
  episode_len_mean: 376.29
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.62900000000027
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 2
  episodes_total: 2490
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.83808892122981e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.407302639219496
          entropy_coeff: 0.009999999999999998
          kl: 0.07172656801736568
          policy_loss: -0.055458405199978085
          total_loss: 0.9464055154058668
          vf_explained_var: 0.16772277653217316
          vf_loss: 1.0159362475905154
    num_agent_steps_sampled: 788000
    num_agent_steps_trained: 788000
    num_steps_sampled: 788000
    num_steps_trained: 788000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,788,21330.6,788000,-37.629,-31,-47.8,376.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 789000
  custom_metrics: {}
  date: 2021-10-22_01-41-28
  done: false
  episode_len_mean: 373.63
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.36300000000026
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 3
  episodes_total: 2493
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4757133381844718e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.432720629374186
          entropy_coeff: 0.009999999999999998
          kl: 0.01810567169790447
          policy_loss: 0.004833344866832097
          total_loss: 1.0638789706759983
          vf_explained_var: 0.0710982233285904
          vf_loss: 1.073372569349077
    num_agent_steps_sampled: 789000
    num_agent_steps_trained: 789000
    num_steps_sampled: 789000
    num_steps_trained: 789000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,789,21350.1,789000,-37.363,-31,-47.8,373.63


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 790000
  custom_metrics: {}
  date: 2021-10-22_01-41-48
  done: false
  episode_len_mean: 371.43
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.14300000000026
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 3
  episodes_total: 2496
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4757133381844718e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.4604196389516195
          entropy_coeff: 0.009999999999999998
          kl: 0.012196226725990932
          policy_loss: 0.04737471143404643
          total_loss: 1.173268078102006
          vf_explained_var: 0.21048617362976074
          vf_loss: 1.1404974105457464
    num_agent_steps_sampled: 790000
    num_agent_steps_trained: 790000
    num_steps_sampled: 790000
    num_steps_trained: 790000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,790,21369.7,790000,-37.143,-31,-47.8,371.43


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 791000
  custom_metrics: {}
  date: 2021-10-22_01-42-06
  done: false
  episode_len_mean: 370.81
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.08100000000025
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 2
  episodes_total: 2498
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4757133381844718e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.4159727719095019
          entropy_coeff: 0.009999999999999998
          kl: 0.015157895722855032
          policy_loss: -0.09738114492760765
          total_loss: 1.1269196430842081
          vf_explained_var: 0.019285138696432114
          vf_loss: 1.2384602842645513
    num_agent_steps_sampled: 791000
    num_agent_steps_trained: 791000
    num_steps_sampled: 791000
    num_steps_trained: 791000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,791,21388.1,791000,-37.081,-31,-47.8,370.81


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 792000
  custom_metrics: {}
  date: 2021-10-22_01-42-25
  done: false
  episode_len_mean: 370.92
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.092000000000255
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 3
  episodes_total: 2501
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4757133381844718e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.4863993750678168
          entropy_coeff: 0.009999999999999998
          kl: 0.018368757418201397
          policy_loss: 0.040339189685053296
          total_loss: 1.2354803403218588
          vf_explained_var: 0.047733280807733536
          vf_loss: 1.2100048893027835
    num_agent_steps_sampled: 792000
    num_agent_steps_trained: 792000
    num_steps_sampled: 792000
    num_steps_trained: 792000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,792,21406.9,792000,-37.092,-31,-47.8,370.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 793000
  custom_metrics: {}
  date: 2021-10-22_01-42-44
  done: false
  episode_len_mean: 370.32
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.03200000000026
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 2
  episodes_total: 2503
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4757133381844718e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.5675764083862305
          entropy_coeff: 0.009999999999999998
          kl: 0.022586869573116255
          policy_loss: -0.08296389579772949
          total_loss: 1.1288545946280162
          vf_explained_var: -0.25234565138816833
          vf_loss: 1.2274939069317448
    num_agent_steps_sampled: 793000
    num_agent_steps_trained: 793000
    num_steps_sampled: 793000
    num_steps_trained: 793000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,793,21425.9,793000,-37.032,-31,-47.8,370.32


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 794000
  custom_metrics: {}
  date: 2021-10-22_01-43-03
  done: false
  episode_len_mean: 371.19
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.11900000000025
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 3
  episodes_total: 2506
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.213570007276707e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.5455330782466465
          entropy_coeff: 0.009999999999999998
          kl: 0.024571237103541994
          policy_loss: 0.056676465769608816
          total_loss: 1.2520665172073575
          vf_explained_var: -0.11200506240129471
          vf_loss: 1.210844838205311
    num_agent_steps_sampled: 794000
    num_agent_steps_trained: 794000
    num_steps_sampled: 794000
    num_steps_trained: 794000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,794,21444.5,794000,-37.119,-31,-47.8,371.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 795000
  custom_metrics: {}
  date: 2021-10-22_01-43-22
  done: false
  episode_len_mean: 371.68
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.16800000000026
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 2
  episodes_total: 2508
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.320355010915061e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.5213338123427498
          entropy_coeff: 0.009999999999999998
          kl: 0.022079705963412147
          policy_loss: -0.06323345883025064
          total_loss: 1.172417801287439
          vf_explained_var: 0.23104611039161682
          vf_loss: 1.2508638613753849
    num_agent_steps_sampled: 795000
    num_agent_steps_trained: 795000
    num_steps_sampled: 795000
    num_steps_trained: 795000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,795,21463.3,795000,-37.168,-31,-47.8,371.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 796000
  custom_metrics: {}
  date: 2021-10-22_01-43-41
  done: false
  episode_len_mean: 372.62
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.26200000000026
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 3
  episodes_total: 2511
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.980532516372591e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.5641161772939893
          entropy_coeff: 0.009999999999999998
          kl: 0.01626432412786506
          policy_loss: 0.043831563078694874
          total_loss: 1.2513967000775867
          vf_explained_var: -0.037449996918439865
          vf_loss: 1.2232054777443409
    num_agent_steps_sampled: 796000
    num_agent_steps_trained: 796000
    num_steps_sampled: 796000
    num_steps_trained: 796000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,796,21482.5,796000,-37.262,-31,-47.8,372.62


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 797000
  custom_metrics: {}
  date: 2021-10-22_01-43-59
  done: false
  episode_len_mean: 374.22
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.42200000000026
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 2
  episodes_total: 2513
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.980532516372591e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.5623545858595107
          entropy_coeff: 0.009999999999999998
          kl: 0.010527284648393396
          policy_loss: -0.07833299007680682
          total_loss: 1.1109460684988233
          vf_explained_var: -0.27538761496543884
          vf_loss: 1.2049020718369219
    num_agent_steps_sampled: 797000
    num_agent_steps_trained: 797000
    num_steps_sampled: 797000
    num_steps_trained: 797000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,797,21500.8,797000,-37.422,-31,-47.8,374.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 798000
  custom_metrics: {}
  date: 2021-10-22_01-44-18
  done: false
  episode_len_mean: 375.83
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.58300000000026
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 3
  episodes_total: 2516
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.980532516372591e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.5517607993549771
          entropy_coeff: 0.009999999999999998
          kl: 0.011741625275422506
          policy_loss: 0.04501663578881158
          total_loss: 1.2923035671313603
          vf_explained_var: 0.0545513890683651
          vf_loss: 1.2628039539688163
    num_agent_steps_sampled: 798000
    num_agent_steps_trained: 798000
    num_steps_sampled: 798000
    num_steps_trained: 798000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,798,21519.9,798000,-37.583,-31,-47.8,375.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 799000
  custom_metrics: {}
  date: 2021-10-22_01-44-35
  done: false
  episode_len_mean: 378.16
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.816000000000265
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 2
  episodes_total: 2518
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.980532516372591e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.6509931749767728
          entropy_coeff: 0.009999999999999998
          kl: 0.019991106974748766
          policy_loss: 0.10418388197819392
          total_loss: 0.6874715235498217
          vf_explained_var: -0.5340603590011597
          vf_loss: 0.5997965723483099
    num_agent_steps_sampled: 799000
    num_agent_steps_trained: 799000
    num_steps_sampled: 799000
    num_steps_trained: 799000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,799,21537,799000,-37.816,-31,-47.8,378.16




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 800000
  custom_metrics: {}
  date: 2021-10-22_01-45-12
  done: false
  episode_len_mean: 378.46
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.84600000000027
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 2
  episodes_total: 2520
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.980532516372591e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.5991841263241238
          entropy_coeff: 0.009999999999999998
          kl: 0.017676962469365107
          policy_loss: -0.08297696659962336
          total_loss: 1.1953297164705066
          vf_explained_var: -0.26676782965660095
          vf_loss: 1.29429765459564
    num_agent_steps_sampled: 800000
    num_agent_steps_trained: 800000
    num_steps_sampled: 800000
    num_steps_trained: 800000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,800,21573.9,800000,-37.846,-31,-47.8,378.46


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 801000
  custom_metrics: {}
  date: 2021-10-22_01-45-33
  done: false
  episode_len_mean: 380.01
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -38.00100000000027
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 3
  episodes_total: 2523
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.980532516372591e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.554511296749115
          entropy_coeff: 0.009999999999999998
          kl: 0.010730621602605656
          policy_loss: -0.09806029183997048
          total_loss: 1.777900559372372
          vf_explained_var: 0.03646737337112427
          vf_loss: 1.8915054546462164
    num_agent_steps_sampled: 801000
    num_agent_steps_trained: 801000
    num_steps_sampled: 801000
    num_steps_trained: 801000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,801,21594.5,801000,-38.001,-31,-47.8,380.01


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 802000
  custom_metrics: {}
  date: 2021-10-22_01-45-53
  done: false
  episode_len_mean: 381.21
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -38.12100000000027
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 3
  episodes_total: 2526
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.980532516372591e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.5881941027111477
          entropy_coeff: 0.009999999999999998
          kl: 0.025089727170053224
          policy_loss: 0.03836205013924175
          total_loss: 1.2283285511864557
          vf_explained_var: -0.060804132372140884
          vf_loss: 1.2058472134172917
    num_agent_steps_sampled: 802000
    num_agent_steps_trained: 802000
    num_steps_sampled: 802000
    num_steps_trained: 802000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,802,21614.8,802000,-38.121,-31,-47.8,381.21


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 803000
  custom_metrics: {}
  date: 2021-10-22_01-46-12
  done: false
  episode_len_mean: 382.06
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -38.20600000000027
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 3
  episodes_total: 2529
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.47079877455889e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.564037197166019
          entropy_coeff: 0.009999999999999998
          kl: 0.021457156190588675
          policy_loss: 0.06140478509995673
          total_loss: 1.1010740492078992
          vf_explained_var: 0.032332468777894974
          vf_loss: 1.0553080293867323
    num_agent_steps_sampled: 803000
    num_agent_steps_trained: 803000
    num_steps_sampled: 803000
    num_steps_trained: 803000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,803,21634,803000,-38.206,-31,-47.8,382.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 804000
  custom_metrics: {}
  date: 2021-10-22_01-46-32
  done: false
  episode_len_mean: 382.92
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -38.29200000000028
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 2
  episodes_total: 2531
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00011206198161838332
          cur_lr: 5.000000000000001e-05
          entropy: 1.5447575357225207
          entropy_coeff: 0.009999999999999998
          kl: 0.030283572920626313
          policy_loss: -0.09024521956841151
          total_loss: 1.1797465638981925
          vf_explained_var: -0.2562144696712494
          vf_loss: 1.2854359671887425
    num_agent_steps_sampled: 804000
    num_agent_steps_trained: 804000
    num_steps_sampled: 804000
    num_steps_trained: 804000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,804,21653.8,804000,-38.292,-31,-47.8,382.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 805000
  custom_metrics: {}
  date: 2021-10-22_01-46-52
  done: false
  episode_len_mean: 383.72
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -38.37200000000028
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 3
  episodes_total: 2534
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00016809297242757496
          cur_lr: 5.000000000000001e-05
          entropy: 1.5696585959858365
          entropy_coeff: 0.009999999999999998
          kl: 0.02193244682109336
          policy_loss: 0.03983983968695005
          total_loss: 1.3319200972716014
          vf_explained_var: 0.050644401460886
          vf_loss: 1.3077731437153286
    num_agent_steps_sampled: 805000
    num_agent_steps_trained: 805000
    num_steps_sampled: 805000
    num_steps_trained: 805000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,805,21673.5,805000,-38.372,-31,-47.8,383.72


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 806000
  custom_metrics: {}
  date: 2021-10-22_01-47-12
  done: false
  episode_len_mean: 382.92
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -38.29200000000027
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 2
  episodes_total: 2536
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002521394586413625
          cur_lr: 5.000000000000001e-05
          entropy: 1.6049963633219402
          entropy_coeff: 0.009999999999999998
          kl: 0.008182585460451738
          policy_loss: -0.0797537429465188
          total_loss: 1.238258844614029
          vf_explained_var: -0.24031005799770355
          vf_loss: 1.334060491952631
    num_agent_steps_sampled: 806000
    num_agent_steps_trained: 806000
    num_steps_sampled: 806000
    num_steps_trained: 806000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,806,21693.3,806000,-38.292,-31,-47.8,382.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 807000
  custom_metrics: {}
  date: 2021-10-22_01-47-30
  done: false
  episode_len_mean: 381.57
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -38.157000000000274
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 3
  episodes_total: 2539
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002521394586413625
          cur_lr: 5.000000000000001e-05
          entropy: 1.5776290403472053
          entropy_coeff: 0.009999999999999998
          kl: 0.010516706537677223
          policy_loss: 0.05331208854913712
          total_loss: 1.2791191495127148
          vf_explained_var: -0.17610687017440796
          vf_loss: 1.2415806950794326
    num_agent_steps_sampled: 807000
    num_agent_steps_trained: 807000
    num_steps_sampled: 807000
    num_steps_trained: 807000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,807,21711.5,807000,-38.157,-31,-47.8,381.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 808000
  custom_metrics: {}
  date: 2021-10-22_01-47-50
  done: false
  episode_len_mean: 380.16
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -38.016000000000275
  episode_reward_min: -47.80000000000041
  episodes_this_iter: 2
  episodes_total: 2541
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002521394586413625
          cur_lr: 5.000000000000001e-05
          entropy: 1.6102988640467326
          entropy_coeff: 0.009999999999999998
          kl: 0.011366339650069513
          policy_loss: -0.09985959728558859
          total_loss: 1.265913184814983
          vf_explained_var: 0.10694222897291183
          vf_loss: 1.38187289569113
    num_agent_steps_sampled: 808000
    num_agent_steps_trained: 808000
    num_steps_sampled: 808000
    num_steps_trained: 808000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,808,21731.2,808000,-38.016,-31,-47.8,380.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 809000
  custom_metrics: {}
  date: 2021-10-22_01-48-08
  done: false
  episode_len_mean: 378.8
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.88000000000027
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 2544
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002521394586413625
          cur_lr: 5.000000000000001e-05
          entropy: 1.5542693893114725
          entropy_coeff: 0.009999999999999998
          kl: 0.0064343751069841125
          policy_loss: 0.04587423569626278
          total_loss: 1.354625133342213
          vf_explained_var: -0.13489560782909393
          vf_loss: 1.3242919852336248
    num_agent_steps_sampled: 809000
    num_agent_steps_trained: 809000
    num_steps_sampled: 809000
    num_steps_trained: 809000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,809,21749.5,809000,-37.88,-31,-45.2,378.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 810000
  custom_metrics: {}
  date: 2021-10-22_01-48-26
  done: false
  episode_len_mean: 378.44
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.84400000000027
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 2
  episodes_total: 2546
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002521394586413625
          cur_lr: 5.000000000000001e-05
          entropy: 1.5225726299815707
          entropy_coeff: 0.009999999999999998
          kl: 0.008307235775983359
          policy_loss: -0.0875819186369578
          total_loss: 1.1622890369759666
          vf_explained_var: -0.24201828241348267
          vf_loss: 1.2650946021080016
    num_agent_steps_sampled: 810000
    num_agent_steps_trained: 810000
    num_steps_sampled: 810000
    num_steps_trained: 810000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,810,21767.8,810000,-37.844,-31,-45.2,378.44


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 811000
  custom_metrics: {}
  date: 2021-10-22_01-48-44
  done: false
  episode_len_mean: 378.51
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.85100000000027
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 2549
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002521394586413625
          cur_lr: 5.000000000000001e-05
          entropy: 1.5056917548179627
          entropy_coeff: 0.009999999999999998
          kl: 0.01292492734113553
          policy_loss: 0.05239583899577459
          total_loss: 1.3342532164520688
          vf_explained_var: -0.014497602358460426
          vf_loss: 1.2969110366370944
    num_agent_steps_sampled: 811000
    num_agent_steps_trained: 811000
    num_steps_sampled: 811000
    num_steps_trained: 811000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,811,21785.6,811000,-37.851,-31,-45.2,378.51




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 812000
  custom_metrics: {}
  date: 2021-10-22_01-49-21
  done: false
  episode_len_mean: 377.87
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.78700000000027
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 2
  episodes_total: 2551
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002521394586413625
          cur_lr: 5.000000000000001e-05
          entropy: 1.4891405132081774
          entropy_coeff: 0.009999999999999998
          kl: 0.014934542748212229
          policy_loss: -0.0966771441201369
          total_loss: 1.0098881383736928
          vf_explained_var: 0.20395834743976593
          vf_loss: 1.1214529242780473
    num_agent_steps_sampled: 812000
    num_agent_steps_trained: 812000
    num_steps_sampled: 812000
    num_steps_trained: 812000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,812,21822.6,812000,-37.787,-31,-45.2,377.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 813000
  custom_metrics: {}
  date: 2021-10-22_01-49-42
  done: false
  episode_len_mean: 377.51
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.75100000000027
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 2554
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002521394586413625
          cur_lr: 5.000000000000001e-05
          entropy: 1.4333633621533712
          entropy_coeff: 0.009999999999999998
          kl: 0.012232365501392116
          policy_loss: -0.06206781797938877
          total_loss: 1.4291898561848535
          vf_explained_var: 0.04153648391366005
          vf_loss: 1.5055882255236308
    num_agent_steps_sampled: 813000
    num_agent_steps_trained: 813000
    num_steps_sampled: 813000
    num_steps_trained: 813000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,813,21843.1,813000,-37.751,-31,-45.2,377.51


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 814000
  custom_metrics: {}
  date: 2021-10-22_01-50-03
  done: false
  episode_len_mean: 376.69
  episode_media: {}
  episode_reward_max: -31.00000000000017
  episode_reward_mean: -37.66900000000027
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 2557
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002521394586413625
          cur_lr: 5.000000000000001e-05
          entropy: 1.437695531050364
          entropy_coeff: 0.009999999999999998
          kl: 0.007858559390599101
          policy_loss: 0.034834287729528214
          total_loss: 1.3091978460550309
          vf_explained_var: -0.03863495588302612
          vf_loss: 1.288738524582651
    num_agent_steps_sampled: 814000
    num_agent_steps_trained: 814000
    num_steps_sampled: 814000
    num_steps_trained: 814000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,814,21864.8,814000,-37.669,-31,-45.2,376.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 815000
  custom_metrics: {}
  date: 2021-10-22_01-50-24
  done: false
  episode_len_mean: 377.45
  episode_media: {}
  episode_reward_max: -31.60000000000018
  episode_reward_mean: -37.74500000000027
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 2560
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002521394586413625
          cur_lr: 5.000000000000001e-05
          entropy: 1.4324225200547112
          entropy_coeff: 0.009999999999999998
          kl: 0.012899336536503844
          policy_loss: 0.047788777947425844
          total_loss: 1.341404269138972
          vf_explained_var: -0.2126956582069397
          vf_loss: 1.3079364523291588
    num_agent_steps_sampled: 815000
    num_agent_steps_trained: 815000
    num_steps_sampled: 815000
    num_steps_trained: 815000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,815,21885.7,815000,-37.745,-31.6,-45.2,377.45


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 816000
  custom_metrics: {}
  date: 2021-10-22_01-50-44
  done: false
  episode_len_mean: 377.89
  episode_media: {}
  episode_reward_max: -31.60000000000018
  episode_reward_mean: -37.78900000000026
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 2
  episodes_total: 2562
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002521394586413625
          cur_lr: 5.000000000000001e-05
          entropy: 1.3770967695448133
          entropy_coeff: 0.009999999999999998
          kl: 0.021096764709301753
          policy_loss: -0.08124646246433258
          total_loss: 1.1447206450833214
          vf_explained_var: -0.08290696144104004
          vf_loss: 1.2397327611843745
    num_agent_steps_sampled: 816000
    num_agent_steps_trained: 816000
    num_steps_sampled: 816000
    num_steps_trained: 816000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,816,21905.5,816000,-37.789,-31.6,-45.2,377.89


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 817000
  custom_metrics: {}
  date: 2021-10-22_01-51-07
  done: false
  episode_len_mean: 377.85
  episode_media: {}
  episode_reward_max: -31.900000000000183
  episode_reward_mean: -37.78500000000027
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 2565
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003782091879620437
          cur_lr: 5.000000000000001e-05
          entropy: 1.3239861382378473
          entropy_coeff: 0.009999999999999998
          kl: 0.016595353125451812
          policy_loss: -0.10623683118157917
          total_loss: 1.691254625055525
          vf_explained_var: 0.07744915038347244
          vf_loss: 1.8107250544759963
    num_agent_steps_sampled: 817000
    num_agent_steps_trained: 817000
    num_steps_sampled: 817000
    num_steps_trained: 817000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,817,21927.9,817000,-37.785,-31.9,-45.2,377.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 818000
  custom_metrics: {}
  date: 2021-10-22_01-51-30
  done: false
  episode_len_mean: 376.96
  episode_media: {}
  episode_reward_max: -28.10000000000013
  episode_reward_mean: -37.696000000000254
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 2569
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003782091879620437
          cur_lr: 5.000000000000001e-05
          entropy: 1.3234380563100179
          entropy_coeff: 0.009999999999999998
          kl: 0.01294295601560683
          policy_loss: 0.013513236741224925
          total_loss: 1.7996498889393278
          vf_explained_var: 0.047118235379457474
          vf_loss: 1.7993661522865296
    num_agent_steps_sampled: 818000
    num_agent_steps_trained: 818000
    num_steps_sampled: 818000
    num_steps_trained: 818000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,818,21951.7,818000,-37.696,-28.1,-45.2,376.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 819000
  custom_metrics: {}
  date: 2021-10-22_01-51-54
  done: false
  episode_len_mean: 375.99
  episode_media: {}
  episode_reward_max: -28.10000000000013
  episode_reward_mean: -37.59900000000026
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 2572
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003782091879620437
          cur_lr: 5.000000000000001e-05
          entropy: 1.334876463148329
          entropy_coeff: 0.009999999999999998
          kl: 0.010532711818087452
          policy_loss: 0.05472317950593101
          total_loss: 1.139917128947046
          vf_explained_var: -0.20885160565376282
          vf_loss: 1.0985387214356
    num_agent_steps_sampled: 819000
    num_agent_steps_trained: 819000
    num_steps_sampled: 819000
    num_steps_trained: 819000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,819,21975.2,819000,-37.599,-28.1,-45.2,375.99


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 820000
  custom_metrics: {}
  date: 2021-10-22_01-52-16
  done: false
  episode_len_mean: 375.2
  episode_media: {}
  episode_reward_max: -28.10000000000013
  episode_reward_mean: -37.520000000000266
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 2575
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003782091879620437
          cur_lr: 5.000000000000001e-05
          entropy: 1.3565011594030592
          entropy_coeff: 0.009999999999999998
          kl: 0.012539837996464421
          policy_loss: 0.03662473600771692
          total_loss: 1.2969544139173297
          vf_explained_var: 0.0103904465213418
          vf_loss: 1.2738899351822006
    num_agent_steps_sampled: 820000
    num_agent_steps_trained: 820000
    num_steps_sampled: 820000
    num_steps_trained: 820000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,820,21997.1,820000,-37.52,-28.1,-45.2,375.2


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 821000
  custom_metrics: {}
  date: 2021-10-22_01-52-37
  done: false
  episode_len_mean: 374.66
  episode_media: {}
  episode_reward_max: -28.10000000000013
  episode_reward_mean: -37.466000000000264
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 2578
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003782091879620437
          cur_lr: 5.000000000000001e-05
          entropy: 1.3868580063184102
          entropy_coeff: 0.009999999999999998
          kl: 0.012000340240110399
          policy_loss: 0.04562078035540051
          total_loss: 1.254201266169548
          vf_explained_var: 0.04226253926753998
          vf_loss: 1.2224445104599
    num_agent_steps_sampled: 821000
    num_agent_steps_trained: 821000
    num_steps_sampled: 821000
    num_steps_trained: 821000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,821,22018.5,821000,-37.466,-28.1,-45.2,374.66




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 822000
  custom_metrics: {}
  date: 2021-10-22_01-53-17
  done: false
  episode_len_mean: 373.9
  episode_media: {}
  episode_reward_max: -28.10000000000013
  episode_reward_mean: -37.39000000000026
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 2581
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003782091879620437
          cur_lr: 5.000000000000001e-05
          entropy: 1.387612533569336
          entropy_coeff: 0.009999999999999998
          kl: 0.012495728878049504
          policy_loss: 0.05326078666581048
          total_loss: 1.234594342443678
          vf_explained_var: 0.02370302937924862
          vf_loss: 1.1952049526903363
    num_agent_steps_sampled: 822000
    num_agent_steps_trained: 822000
    num_steps_sampled: 822000
    num_steps_trained: 822000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,822,22057.9,822000,-37.39,-28.1,-45.2,373.9


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 823000
  custom_metrics: {}
  date: 2021-10-22_01-53-39
  done: false
  episode_len_mean: 373.09
  episode_media: {}
  episode_reward_max: -28.10000000000013
  episode_reward_mean: -37.30900000000025
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 2584
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003782091879620437
          cur_lr: 5.000000000000001e-05
          entropy: 1.4142893367343479
          entropy_coeff: 0.009999999999999998
          kl: 0.007509721998099339
          policy_loss: 0.08012284669611189
          total_loss: 0.9520246873299281
          vf_explained_var: 0.06474027037620544
          vf_loss: 0.8860418880979221
    num_agent_steps_sampled: 823000
    num_agent_steps_trained: 823000
    num_steps_sampled: 823000
    num_steps_trained: 823000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,823,22080.5,823000,-37.309,-28.1,-45.2,373.09


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 824000
  custom_metrics: {}
  date: 2021-10-22_01-54-02
  done: false
  episode_len_mean: 372.35
  episode_media: {}
  episode_reward_max: -28.10000000000013
  episode_reward_mean: -37.23500000000026
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 2587
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003782091879620437
          cur_lr: 5.000000000000001e-05
          entropy: 1.431177306175232
          entropy_coeff: 0.009999999999999998
          kl: 0.008424136767654982
          policy_loss: 0.04100365804301368
          total_loss: 1.253324465619193
          vf_explained_var: -0.20271651446819305
          vf_loss: 1.226629403233528
    num_agent_steps_sampled: 824000
    num_agent_steps_trained: 824000
    num_steps_sampled: 824000
    num_steps_trained: 824000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,824,22102.7,824000,-37.235,-28.1,-45.2,372.35


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 825000
  custom_metrics: {}
  date: 2021-10-22_01-54-24
  done: false
  episode_len_mean: 371.46
  episode_media: {}
  episode_reward_max: -28.10000000000013
  episode_reward_mean: -37.14600000000026
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 2590
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003782091879620437
          cur_lr: 5.000000000000001e-05
          entropy: 1.4305742131339179
          entropy_coeff: 0.009999999999999998
          kl: 0.007711250204068184
          policy_loss: 0.04317005798220634
          total_loss: 1.308679269750913
          vf_explained_var: 0.14787112176418304
          vf_loss: 1.2798120140201517
    num_agent_steps_sampled: 825000
    num_agent_steps_trained: 825000
    num_steps_sampled: 825000
    num_steps_trained: 825000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,825,22125.3,825000,-37.146,-28.1,-45.2,371.46


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 826000
  custom_metrics: {}
  date: 2021-10-22_01-54-48
  done: false
  episode_len_mean: 369.58
  episode_media: {}
  episode_reward_max: -28.10000000000013
  episode_reward_mean: -36.958000000000254
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 2593
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003782091879620437
          cur_lr: 5.000000000000001e-05
          entropy: 1.3996828940179613
          entropy_coeff: 0.009999999999999998
          kl: 0.011840361037982285
          policy_loss: -0.11089935360683335
          total_loss: 1.6665348953670927
          vf_explained_var: 0.06278853118419647
          vf_loss: 1.791426600350274
    num_agent_steps_sampled: 826000
    num_agent_steps_trained: 826000
    num_steps_sampled: 826000
    num_steps_trained: 826000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,826,22149.1,826000,-36.958,-28.1,-45.2,369.58


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 827000
  custom_metrics: {}
  date: 2021-10-22_01-55-12
  done: false
  episode_len_mean: 368.21
  episode_media: {}
  episode_reward_max: -28.10000000000013
  episode_reward_mean: -36.821000000000254
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 2596
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003782091879620437
          cur_lr: 5.000000000000001e-05
          entropy: 1.3888545817799038
          entropy_coeff: 0.009999999999999998
          kl: 0.008042274547427534
          policy_loss: -0.11444599446323182
          total_loss: 1.4937682999504938
          vf_explained_var: 0.1431250274181366
          vf_loss: 1.6220998062027825
    num_agent_steps_sampled: 827000
    num_agent_steps_trained: 827000
    num_steps_sampled: 827000
    num_steps_trained: 827000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,827,22173,827000,-36.821,-28.1,-45.2,368.21


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 828000
  custom_metrics: {}
  date: 2021-10-22_01-55-35
  done: false
  episode_len_mean: 364.46
  episode_media: {}
  episode_reward_max: -28.10000000000013
  episode_reward_mean: -36.44600000000025
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 2600
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003782091879620437
          cur_lr: 5.000000000000001e-05
          entropy: 1.3627324144045512
          entropy_coeff: 0.009999999999999998
          kl: 0.00596296716259707
          policy_loss: 0.012596669130855137
          total_loss: 1.6879001100858053
          vf_explained_var: 0.10165458172559738
          vf_loss: 1.6889285378985934
    num_agent_steps_sampled: 828000
    num_agent_steps_trained: 828000
    num_steps_sampled: 828000
    num_steps_trained: 828000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,828,22195.9,828000,-36.446,-28.1,-45.2,364.46


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 829000
  custom_metrics: {}
  date: 2021-10-22_01-56-00
  done: false
  episode_len_mean: 361.17
  episode_media: {}
  episode_reward_max: -28.10000000000013
  episode_reward_mean: -36.117000000000246
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 2603
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003782091879620437
          cur_lr: 5.000000000000001e-05
          entropy: 1.364479394753774
          entropy_coeff: 0.009999999999999998
          kl: 0.01554285478461834
          policy_loss: 0.025822659499115413
          total_loss: 1.0786890361044141
          vf_explained_var: -0.002336222678422928
          vf_loss: 1.066505283779568
    num_agent_steps_sampled: 829000
    num_agent_steps_trained: 829000
    num_steps_sampled: 829000
    num_steps_trained: 829000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,829,22221.1,829000,-36.117,-28.1,-45.2,361.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 830000
  custom_metrics: {}
  date: 2021-10-22_01-56-25
  done: false
  episode_len_mean: 356.26
  episode_media: {}
  episode_reward_max: -28.000000000000128
  episode_reward_mean: -35.62600000000023
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 2607
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003782091879620437
          cur_lr: 5.000000000000001e-05
          entropy: 1.2687390168507895
          entropy_coeff: 0.009999999999999998
          kl: 0.01866256843077478
          policy_loss: 0.03656123388144705
          total_loss: 1.2851938671535916
          vf_explained_var: 0.17196659743785858
          vf_loss: 1.2613129728370243
    num_agent_steps_sampled: 830000
    num_agent_steps_trained: 830000
    num_steps_sampled: 830000
    num_steps_trained: 830000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,830,22245.7,830000,-35.626,-28,-45.2,356.26




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 831000
  custom_metrics: {}
  date: 2021-10-22_01-57-07
  done: false
  episode_len_mean: 352.34
  episode_media: {}
  episode_reward_max: -24.200000000000074
  episode_reward_mean: -35.23400000000023
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 2610
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003782091879620437
          cur_lr: 5.000000000000001e-05
          entropy: 1.2174971474541558
          entropy_coeff: 0.009999999999999998
          kl: 0.014755310852484271
          policy_loss: 0.0023412153952651555
          total_loss: 0.9192972713046603
          vf_explained_var: 0.2923239469528198
          vf_loss: 0.9291254507170783
    num_agent_steps_sampled: 831000
    num_agent_steps_trained: 831000
    num_steps_sampled: 831000
    num_steps_trained: 831000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,831,22288,831000,-35.234,-24.2,-45.2,352.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 832000
  custom_metrics: {}
  date: 2021-10-22_01-57-37
  done: false
  episode_len_mean: 346.79
  episode_media: {}
  episode_reward_max: -24.200000000000074
  episode_reward_mean: -34.679000000000215
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 2614
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003782091879620437
          cur_lr: 5.000000000000001e-05
          entropy: 1.122188385327657
          entropy_coeff: 0.009999999999999998
          kl: 0.017211755913583537
          policy_loss: 0.03294735542602009
          total_loss: 1.53271926773919
          vf_explained_var: 0.1888170838356018
          vf_loss: 1.510987267229292
    num_agent_steps_sampled: 832000
    num_agent_steps_trained: 832000
    num_steps_sampled: 832000
    num_steps_trained: 832000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,832,22318.1,832000,-34.679,-24.2,-45.2,346.79


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 833000
  custom_metrics: {}
  date: 2021-10-22_01-58-04
  done: false
  episode_len_mean: 340.25
  episode_media: {}
  episode_reward_max: -24.200000000000074
  episode_reward_mean: -34.025000000000205
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 2618
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003782091879620437
          cur_lr: 5.000000000000001e-05
          entropy: 1.0557428492440117
          entropy_coeff: 0.009999999999999998
          kl: 0.004895076647457586
          policy_loss: 0.021515768435266282
          total_loss: 1.6156230754322476
          vf_explained_var: 0.14713451266288757
          vf_loss: 1.6046629005008273
    num_agent_steps_sampled: 833000
    num_agent_steps_trained: 833000
    num_steps_sampled: 833000
    num_steps_trained: 833000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,833,22344.8,833000,-34.025,-24.2,-43.1,340.25


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 834000
  custom_metrics: {}
  date: 2021-10-22_01-58-34
  done: false
  episode_len_mean: 334.95
  episode_media: {}
  episode_reward_max: -24.200000000000074
  episode_reward_mean: -33.49500000000021
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 2622
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018910459398102186
          cur_lr: 5.000000000000001e-05
          entropy: 1.132310895125071
          entropy_coeff: 0.009999999999999998
          kl: 0.018513605634334143
          policy_loss: 0.022908721450302337
          total_loss: 1.2639381210009257
          vf_explained_var: 0.2705625295639038
          vf_loss: 1.2523490203751457
    num_agent_steps_sampled: 834000
    num_agent_steps_trained: 834000
    num_steps_sampled: 834000
    num_steps_trained: 834000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,834,22375.3,834000,-33.495,-24.2,-43.1,334.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 835000
  custom_metrics: {}
  date: 2021-10-22_01-59-02
  done: false
  episode_len_mean: 330.58
  episode_media: {}
  episode_reward_max: -24.200000000000074
  episode_reward_mean: -33.0580000000002
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 2626
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018910459398102186
          cur_lr: 5.000000000000001e-05
          entropy: 1.1058510859807333
          entropy_coeff: 0.009999999999999998
          kl: 0.008407060140873628
          policy_loss: -0.006755196965403027
          total_loss: 0.8104083028104571
          vf_explained_var: 0.6529306769371033
          vf_loss: 0.828220416771041
    num_agent_steps_sampled: 835000
    num_agent_steps_trained: 835000
    num_steps_sampled: 835000
    num_steps_trained: 835000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,835,22403.2,835000,-33.058,-24.2,-43.1,330.58


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 836000
  custom_metrics: {}
  date: 2021-10-22_01-59-30
  done: false
  episode_len_mean: 325.86
  episode_media: {}
  episode_reward_max: -24.200000000000074
  episode_reward_mean: -32.5860000000002
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 2630
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018910459398102186
          cur_lr: 5.000000000000001e-05
          entropy: 1.1840363131629097
          entropy_coeff: 0.009999999999999998
          kl: 0.006476655042496857
          policy_loss: -0.02686585063735644
          total_loss: 0.9727113273408677
          vf_explained_var: 0.5589370131492615
          vf_loss: 1.0114162968264686
    num_agent_steps_sampled: 836000
    num_agent_steps_trained: 836000
    num_steps_sampled: 836000
    num_steps_trained: 836000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,836,22430.4,836000,-32.586,-24.2,-43.1,325.86


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 837000
  custom_metrics: {}
  date: 2021-10-22_01-59-58
  done: false
  episode_len_mean: 319.98
  episode_media: {}
  episode_reward_max: -24.200000000000074
  episode_reward_mean: -31.99800000000018
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 2634
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018910459398102186
          cur_lr: 5.000000000000001e-05
          entropy: 1.2021010902192857
          entropy_coeff: 0.009999999999999998
          kl: 0.009226667976645252
          policy_loss: 0.033482274909814196
          total_loss: 1.3252637578381432
          vf_explained_var: 0.40669873356819153
          vf_loss: 1.303800747791926
    num_agent_steps_sampled: 837000
    num_agent_steps_trained: 837000
    num_steps_sampled: 837000
    num_steps_trained: 837000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,837,22458.8,837000,-31.998,-24.2,-43.1,319.98


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 838000
  custom_metrics: {}
  date: 2021-10-22_02-00-25
  done: false
  episode_len_mean: 314.6
  episode_media: {}
  episode_reward_max: -24.200000000000074
  episode_reward_mean: -31.46000000000017
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 2638
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018910459398102186
          cur_lr: 5.000000000000001e-05
          entropy: 1.1670732723342048
          entropy_coeff: 0.009999999999999998
          kl: 0.006049836822188167
          policy_loss: 0.022598160803318022
          total_loss: 1.240070969528622
          vf_explained_var: 0.4128708243370056
          vf_loss: 1.2291423870457543
    num_agent_steps_sampled: 838000
    num_agent_steps_trained: 838000
    num_steps_sampled: 838000
    num_steps_trained: 838000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,838,22485.7,838000,-31.46,-24.2,-43.1,314.6




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 839000
  custom_metrics: {}
  date: 2021-10-22_02-01-12
  done: false
  episode_len_mean: 308.75
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -30.875000000000167
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 2642
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018910459398102186
          cur_lr: 5.000000000000001e-05
          entropy: 1.1242484264903598
          entropy_coeff: 0.009999999999999998
          kl: 0.01181704516305615
          policy_loss: 0.036124643021159704
          total_loss: 0.7096808814340168
          vf_explained_var: 0.7133316993713379
          vf_loss: 0.6847964839802848
    num_agent_steps_sampled: 839000
    num_agent_steps_trained: 839000
    num_steps_sampled: 839000
    num_steps_trained: 839000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,839,22533.1,839000,-30.875,-21.9,-43.1,308.75


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 840000
  custom_metrics: {}
  date: 2021-10-22_02-01-40
  done: false
  episode_len_mean: 302.25
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -30.225000000000158
  episode_reward_min: -42.00000000000033
  episodes_this_iter: 4
  episodes_total: 2646
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018910459398102186
          cur_lr: 5.000000000000001e-05
          entropy: 1.1021735986073813
          entropy_coeff: 0.009999999999999998
          kl: 0.017915628167533507
          policy_loss: 0.04151699327760273
          total_loss: 0.6704351117213567
          vf_explained_var: 0.76898592710495
          vf_loss: 0.6399364660183589
    num_agent_steps_sampled: 840000
    num_agent_steps_trained: 840000
    num_steps_sampled: 840000
    num_steps_trained: 840000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,840,22560.9,840000,-30.225,-21.9,-42,302.25


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 841000
  custom_metrics: {}
  date: 2021-10-22_02-02-08
  done: false
  episode_len_mean: 298.07
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -29.807000000000148
  episode_reward_min: -41.200000000000315
  episodes_this_iter: 3
  episodes_total: 2649
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018910459398102186
          cur_lr: 5.000000000000001e-05
          entropy: 1.2181847678290474
          entropy_coeff: 0.009999999999999998
          kl: 0.01607729986861553
          policy_loss: -0.0591417805188232
          total_loss: 0.5951468681295713
          vf_explained_var: 0.7469779253005981
          vf_loss: 0.6664674546983507
    num_agent_steps_sampled: 841000
    num_agent_steps_trained: 841000
    num_steps_sampled: 841000
    num_steps_trained: 841000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,841,22588.6,841000,-29.807,-21.9,-41.2,298.07


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 842000
  custom_metrics: {}
  date: 2021-10-22_02-02-36
  done: false
  episode_len_mean: 293.84
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -29.384000000000142
  episode_reward_min: -41.200000000000315
  episodes_this_iter: 4
  episodes_total: 2653
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018910459398102186
          cur_lr: 5.000000000000001e-05
          entropy: 1.1520569814576043
          entropy_coeff: 0.009999999999999998
          kl: 0.012469320096181175
          policy_loss: -0.013702972253991499
          total_loss: 0.16853385952611763
          vf_explained_var: 0.9624605774879456
          vf_loss: 0.193755045450396
    num_agent_steps_sampled: 842000
    num_agent_steps_trained: 842000
    num_steps_sampled: 842000
    num_steps_trained: 842000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,842,22616.6,842000,-29.384,-21.9,-41.2,293.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 843000
  custom_metrics: {}
  date: 2021-10-22_02-03-03
  done: false
  episode_len_mean: 289.66
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -28.966000000000136
  episode_reward_min: -41.200000000000315
  episodes_this_iter: 4
  episodes_total: 2657
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018910459398102186
          cur_lr: 5.000000000000001e-05
          entropy: 1.037934907939699
          entropy_coeff: 0.009999999999999998
          kl: 0.04165919211665804
          policy_loss: 0.005724096794923147
          total_loss: 1.1578440662887362
          vf_explained_var: 0.7913206219673157
          vf_loss: 1.162491457329856
    num_agent_steps_sampled: 843000
    num_agent_steps_trained: 843000
    num_steps_sampled: 843000
    num_steps_trained: 843000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,843,22643.6,843000,-28.966,-21.9,-41.2,289.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 844000
  custom_metrics: {}
  date: 2021-10-22_02-03-32
  done: false
  episode_len_mean: 285.32
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -28.53200000000014
  episode_reward_min: -41.200000000000315
  episodes_this_iter: 4
  episodes_total: 2661
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002836568909715328
          cur_lr: 5.000000000000001e-05
          entropy: 0.9915774047374726
          entropy_coeff: 0.009999999999999998
          kl: 0.016336035760501293
          policy_loss: -0.043450664149390326
          total_loss: 0.2858332332637575
          vf_explained_var: 0.9256868362426758
          vf_loss: 0.33919504202074474
    num_agent_steps_sampled: 844000
    num_agent_steps_trained: 844000
    num_steps_sampled: 844000
    num_steps_trained: 844000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,844,22672.6,844000,-28.532,-21.9,-41.2,285.32


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 845000
  custom_metrics: {}
  date: 2021-10-22_02-04-02
  done: false
  episode_len_mean: 281.03
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -28.10300000000013
  episode_reward_min: -34.800000000000225
  episodes_this_iter: 4
  episodes_total: 2665
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002836568909715328
          cur_lr: 5.000000000000001e-05
          entropy: 0.976716935634613
          entropy_coeff: 0.009999999999999998
          kl: 0.010526467309396834
          policy_loss: -0.005065185949206352
          total_loss: 0.35628619492053987
          vf_explained_var: 0.9109245538711548
          vf_loss: 0.37111555967066023
    num_agent_steps_sampled: 845000
    num_agent_steps_trained: 845000
    num_steps_sampled: 845000
    num_steps_trained: 845000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,845,22702.2,845000,-28.103,-21.9,-34.8,281.03


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 846000
  custom_metrics: {}
  date: 2021-10-22_02-04-30
  done: false
  episode_len_mean: 278.65
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -27.865000000000133
  episode_reward_min: -34.800000000000225
  episodes_this_iter: 4
  episodes_total: 2669
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002836568909715328
          cur_lr: 5.000000000000001e-05
          entropy: 0.9690140465895335
          entropy_coeff: 0.009999999999999998
          kl: 0.00866195187217487
          policy_loss: 0.006225168208281199
          total_loss: 0.6436977452701993
          vf_explained_var: 0.7839339971542358
          vf_loss: 0.6471602592203353
    num_agent_steps_sampled: 846000
    num_agent_steps_trained: 846000
    num_steps_sampled: 846000
    num_steps_trained: 846000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,846,22730.2,846000,-27.865,-21.9,-34.8,278.65




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 847000
  custom_metrics: {}
  date: 2021-10-22_02-05-16
  done: false
  episode_len_mean: 275.6
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -27.560000000000123
  episode_reward_min: -34.800000000000225
  episodes_this_iter: 4
  episodes_total: 2673
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002836568909715328
          cur_lr: 5.000000000000001e-05
          entropy: 0.9788329965538449
          entropy_coeff: 0.009999999999999998
          kl: 0.004693842278172787
          policy_loss: -0.11185667945279015
          total_loss: 0.4545924077431361
          vf_explained_var: 0.7953767776489258
          vf_loss: 0.5762360897329118
    num_agent_steps_sampled: 847000
    num_agent_steps_trained: 847000
    num_steps_sampled: 847000
    num_steps_trained: 847000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,847,22776.3,847000,-27.56,-21.7,-34.8,275.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 848000
  custom_metrics: {}
  date: 2021-10-22_02-05-43
  done: false
  episode_len_mean: 272.5
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -27.250000000000114
  episode_reward_min: -34.800000000000225
  episodes_this_iter: 4
  episodes_total: 2677
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001418284454857664
          cur_lr: 5.000000000000001e-05
          entropy: 0.9935532490412394
          entropy_coeff: 0.009999999999999998
          kl: 0.011202263242878431
          policy_loss: -0.005961894078387154
          total_loss: 0.7842693424887127
          vf_explained_var: 0.6153061985969543
          vf_loss: 0.8001651952664057
    num_agent_steps_sampled: 848000
    num_agent_steps_trained: 848000
    num_steps_sampled: 848000
    num_steps_trained: 848000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,848,22804,848000,-27.25,-21.7,-34.8,272.5


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 849000
  custom_metrics: {}
  date: 2021-10-22_02-06-10
  done: false
  episode_len_mean: 269.8
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.98000000000011
  episode_reward_min: -34.30000000000022
  episodes_this_iter: 4
  episodes_total: 2681
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001418284454857664
          cur_lr: 5.000000000000001e-05
          entropy: 0.9847895642121632
          entropy_coeff: 0.009999999999999998
          kl: 0.011955707572862274
          policy_loss: 0.012299283842245738
          total_loss: 1.1945244060622322
          vf_explained_var: 0.2629954516887665
          vf_loss: 1.1920713325341543
    num_agent_steps_sampled: 849000
    num_agent_steps_trained: 849000
    num_steps_sampled: 849000
    num_steps_trained: 849000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,849,22830.1,849000,-26.98,-21.7,-34.3,269.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 850000
  custom_metrics: {}
  date: 2021-10-22_02-06-37
  done: false
  episode_len_mean: 267.12
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.712000000000103
  episode_reward_min: -34.30000000000022
  episodes_this_iter: 4
  episodes_total: 2685
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001418284454857664
          cur_lr: 5.000000000000001e-05
          entropy: 0.9566688299179077
          entropy_coeff: 0.009999999999999998
          kl: 0.0062702477761724315
          policy_loss: 0.010526926981078254
          total_loss: 1.0015845305389828
          vf_explained_var: 0.34687289595603943
          vf_loss: 1.0006234129269918
    num_agent_steps_sampled: 850000
    num_agent_steps_trained: 850000
    num_steps_sampled: 850000
    num_steps_trained: 850000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,850,22857.9,850000,-26.712,-21.7,-34.3,267.12


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 851000
  custom_metrics: {}
  date: 2021-10-22_02-07-04
  done: false
  episode_len_mean: 264.44
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.444000000000106
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 4
  episodes_total: 2689
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001418284454857664
          cur_lr: 5.000000000000001e-05
          entropy: 0.9725905305809445
          entropy_coeff: 0.009999999999999998
          kl: 0.01263836272114559
          policy_loss: 0.023710578183333077
          total_loss: 0.9917888151274787
          vf_explained_var: 0.28442761301994324
          vf_loss: 0.9778023660182953
    num_agent_steps_sampled: 851000
    num_agent_steps_trained: 851000
    num_steps_sampled: 851000
    num_steps_trained: 851000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,851,22884.9,851000,-26.444,-21.7,-34.1,264.44


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 852000
  custom_metrics: {}
  date: 2021-10-22_02-07-32
  done: false
  episode_len_mean: 262.06
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.206000000000103
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 4
  episodes_total: 2693
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001418284454857664
          cur_lr: 5.000000000000001e-05
          entropy: 0.9977321796947055
          entropy_coeff: 0.009999999999999998
          kl: 0.019092378686276293
          policy_loss: 0.04104532599449158
          total_loss: 1.2886029216978285
          vf_explained_var: 0.1269601583480835
          vf_loss: 1.2575322111447653
    num_agent_steps_sampled: 852000
    num_agent_steps_trained: 852000
    num_steps_sampled: 852000
    num_steps_trained: 852000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,852,22912.9,852000,-26.206,-21.7,-34.1,262.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 853000
  custom_metrics: {}
  date: 2021-10-22_02-08-00
  done: false
  episode_len_mean: 259.31
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.931000000000093
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 4
  episodes_total: 2697
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001418284454857664
          cur_lr: 5.000000000000001e-05
          entropy: 1.0868634184201558
          entropy_coeff: 0.009999999999999998
          kl: 0.007509366457048195
          policy_loss: 0.02805032862557305
          total_loss: 1.03916746907764
          vf_explained_var: 0.37435200810432434
          vf_loss: 1.0219847049978044
    num_agent_steps_sampled: 853000
    num_agent_steps_trained: 853000
    num_steps_sampled: 853000
    num_steps_trained: 853000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,853,22940.8,853000,-25.931,-21.7,-32.8,259.31




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 854000
  custom_metrics: {}
  date: 2021-10-22_02-08-45
  done: false
  episode_len_mean: 256.82
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.682000000000095
  episode_reward_min: -29.300000000000146
  episodes_this_iter: 4
  episodes_total: 2701
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001418284454857664
          cur_lr: 5.000000000000001e-05
          entropy: 1.064387802945243
          entropy_coeff: 0.009999999999999998
          kl: 0.006683907580070465
          policy_loss: 0.004225871711969376
          total_loss: 0.802900575266944
          vf_explained_var: 0.6086561679840088
          vf_loss: 0.8093176265557607
    num_agent_steps_sampled: 854000
    num_agent_steps_trained: 854000
    num_steps_sampled: 854000
    num_steps_trained: 854000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,854,22985.8,854000,-25.682,-21.7,-29.3,256.82


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 855000
  custom_metrics: {}
  date: 2021-10-22_02-09-13
  done: false
  episode_len_mean: 256.39
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.63900000000009
  episode_reward_min: -29.300000000000146
  episodes_this_iter: 3
  episodes_total: 2704
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001418284454857664
          cur_lr: 5.000000000000001e-05
          entropy: 1.186534869670868
          entropy_coeff: 0.009999999999999998
          kl: 0.033625815413236164
          policy_loss: -0.03564181733462546
          total_loss: 1.1700545012950898
          vf_explained_var: 0.34797555208206177
          vf_loss: 1.2175569150182937
    num_agent_steps_sampled: 855000
    num_agent_steps_trained: 855000
    num_steps_sampled: 855000
    num_steps_trained: 855000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,855,23013,855000,-25.639,-21.7,-29.3,256.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 856000
  custom_metrics: {}
  date: 2021-10-22_02-09-41
  done: false
  episode_len_mean: 255.22
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.522000000000094
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2708
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00021274266822864956
          cur_lr: 5.000000000000001e-05
          entropy: 1.255737014611562
          entropy_coeff: 0.009999999999999998
          kl: 0.018319564497371408
          policy_loss: -0.05639324962264962
          total_loss: 1.000481806198756
          vf_explained_var: 0.5300309062004089
          vf_loss: 1.069428527355194
    num_agent_steps_sampled: 856000
    num_agent_steps_trained: 856000
    num_steps_sampled: 856000
    num_steps_trained: 856000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,856,23041.5,856000,-25.522,-21.7,-28.6,255.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 857000
  custom_metrics: {}
  date: 2021-10-22_02-10-07
  done: false
  episode_len_mean: 255.13
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.513000000000087
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2712
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00021274266822864956
          cur_lr: 5.000000000000001e-05
          entropy: 1.2723510106404623
          entropy_coeff: 0.009999999999999998
          kl: 0.004605433978361538
          policy_loss: -0.003952969776259529
          total_loss: 1.1446108957131704
          vf_explained_var: 0.5008697509765625
          vf_loss: 1.1612863944636451
    num_agent_steps_sampled: 857000
    num_agent_steps_trained: 857000
    num_steps_sampled: 857000
    num_steps_trained: 857000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,857,23067.9,857000,-25.513,-21.7,-28.6,255.13


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 858000
  custom_metrics: {}
  date: 2021-10-22_02-10-33
  done: false
  episode_len_mean: 255.31
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.53100000000009
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 4
  episodes_total: 2716
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010637133411432478
          cur_lr: 5.000000000000001e-05
          entropy: 1.2938858456081814
          entropy_coeff: 0.009999999999999998
          kl: 0.014008915472851255
          policy_loss: 0.02280259115828408
          total_loss: 1.2960101140869988
          vf_explained_var: 0.3069379925727844
          vf_loss: 1.28614492615064
    num_agent_steps_sampled: 858000
    num_agent_steps_trained: 858000
    num_steps_sampled: 858000
    num_steps_trained: 858000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,858,23093.9,858000,-25.531,-21.7,-28.6,255.31


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 859000
  custom_metrics: {}
  date: 2021-10-22_02-11-00
  done: false
  episode_len_mean: 255.75
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.57500000000009
  episode_reward_min: -28.600000000000136
  episodes_this_iter: 3
  episodes_total: 2719
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010637133411432478
          cur_lr: 5.000000000000001e-05
          entropy: 1.2371765706274245
          entropy_coeff: 0.009999999999999998
          kl: 0.010994202629294689
          policy_loss: -0.11817941317955653
          total_loss: 1.1002791265646616
          vf_explained_var: 0.305297315120697
          vf_loss: 1.2308291289541455
    num_agent_steps_sampled: 859000
    num_agent_steps_trained: 859000
    num_steps_sampled: 859000
    num_steps_trained: 859000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,859,23120.6,859000,-25.575,-21.7,-28.6,255.75


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 860000
  custom_metrics: {}
  date: 2021-10-22_02-11-27
  done: false
  episode_len_mean: 256.48
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.648000000000096
  episode_reward_min: -28.90000000000014
  episodes_this_iter: 4
  episodes_total: 2723
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010637133411432478
          cur_lr: 5.000000000000001e-05
          entropy: 1.2824715561336941
          entropy_coeff: 0.009999999999999998
          kl: 0.023498499303684008
          policy_loss: 0.003003009036183357
          total_loss: 1.0565338651339213
          vf_explained_var: 0.20664650201797485
          vf_loss: 1.066353072391616
    num_agent_steps_sampled: 860000
    num_agent_steps_trained: 860000
    num_steps_sampled: 860000
    num_steps_trained: 860000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,860,23147.7,860000,-25.648,-21.7,-28.9,256.48


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 861000
  custom_metrics: {}
  date: 2021-10-22_02-11-55
  done: false
  episode_len_mean: 257.01
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.70100000000009
  episode_reward_min: -28.90000000000014
  episodes_this_iter: 4
  episodes_total: 2727
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00015955700117148723
          cur_lr: 5.000000000000001e-05
          entropy: 1.2714214338196648
          entropy_coeff: 0.009999999999999998
          kl: 0.020543935350429858
          policy_loss: 0.01081872582435608
          total_loss: 1.1666644586457147
          vf_explained_var: 0.2546869218349457
          vf_loss: 1.1685566729969448
    num_agent_steps_sampled: 861000
    num_agent_steps_trained: 861000
    num_steps_sampled: 861000
    num_steps_trained: 861000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,861,23175.3,861000,-25.701,-21.7,-28.9,257.01




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 862000
  custom_metrics: {}
  date: 2021-10-22_02-12-40
  done: false
  episode_len_mean: 257.19
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.719000000000097
  episode_reward_min: -28.90000000000014
  episodes_this_iter: 4
  episodes_total: 2731
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023933550175723073
          cur_lr: 5.000000000000001e-05
          entropy: 1.2566503233379789
          entropy_coeff: 0.009999999999999998
          kl: 0.009494329304374263
          policy_loss: 0.023705382562345927
          total_loss: 1.1986589358912574
          vf_explained_var: 0.26498255133628845
          vf_loss: 1.1875177986092038
    num_agent_steps_sampled: 862000
    num_agent_steps_trained: 862000
    num_steps_sampled: 862000
    num_steps_trained: 862000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,862,23220.6,862000,-25.719,-21.7,-28.9,257.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 863000
  custom_metrics: {}
  date: 2021-10-22_02-13-06
  done: false
  episode_len_mean: 258.26
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.826000000000093
  episode_reward_min: -29.300000000000146
  episodes_this_iter: 3
  episodes_total: 2734
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023933550175723073
          cur_lr: 5.000000000000001e-05
          entropy: 1.2453126695421006
          entropy_coeff: 0.009999999999999998
          kl: 0.02000282028702986
          policy_loss: -0.06391925679312813
          total_loss: 1.0510623726579877
          vf_explained_var: 0.3601616621017456
          vf_loss: 1.1274299760659536
    num_agent_steps_sampled: 863000
    num_agent_steps_trained: 863000
    num_steps_sampled: 863000
    num_steps_trained: 863000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,863,23246.3,863000,-25.826,-21.7,-29.3,258.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 864000
  custom_metrics: {}
  date: 2021-10-22_02-13-33
  done: false
  episode_len_mean: 258.83
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.8830000000001
  episode_reward_min: -29.300000000000146
  episodes_this_iter: 4
  episodes_total: 2738
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003590032526358462
          cur_lr: 5.000000000000001e-05
          entropy: 1.2359311593903435
          entropy_coeff: 0.009999999999999998
          kl: 0.009622581100493398
          policy_loss: -0.009812435259421666
          total_loss: 1.3116677933269076
          vf_explained_var: 0.23205457627773285
          vf_loss: 1.3338361064592996
    num_agent_steps_sampled: 864000
    num_agent_steps_trained: 864000
    num_steps_sampled: 864000
    num_steps_trained: 864000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,864,23272.8,864000,-25.883,-21.7,-29.3,258.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 865000
  custom_metrics: {}
  date: 2021-10-22_02-13-59
  done: false
  episode_len_mean: 259.5
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.950000000000095
  episode_reward_min: -29.300000000000146
  episodes_this_iter: 4
  episodes_total: 2742
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003590032526358462
          cur_lr: 5.000000000000001e-05
          entropy: 1.2260292450586954
          entropy_coeff: 0.009999999999999998
          kl: 0.010214506022783275
          policy_loss: 0.012653045025136736
          total_loss: 1.1862865057256486
          vf_explained_var: 0.3427780866622925
          vf_loss: 1.1858900864919026
    num_agent_steps_sampled: 865000
    num_agent_steps_trained: 865000
    num_steps_sampled: 865000
    num_steps_trained: 865000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,865,23299.7,865000,-25.95,-21.7,-29.3,259.5


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 866000
  custom_metrics: {}
  date: 2021-10-22_02-14-27
  done: false
  episode_len_mean: 260.1
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.010000000000094
  episode_reward_min: -29.300000000000146
  episodes_this_iter: 4
  episodes_total: 2746
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003590032526358462
          cur_lr: 5.000000000000001e-05
          entropy: 1.2211208264033
          entropy_coeff: 0.009999999999999998
          kl: 0.009028169600791575
          policy_loss: 0.02181075182225969
          total_loss: 1.3873516082763673
          vf_explained_var: 0.1475633978843689
          vf_loss: 1.377748837735918
    num_agent_steps_sampled: 866000
    num_agent_steps_trained: 866000
    num_steps_sampled: 866000
    num_steps_trained: 866000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,866,23327,866000,-26.01,-21.7,-29.3,260.1


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 867000
  custom_metrics: {}
  date: 2021-10-22_02-14-53
  done: false
  episode_len_mean: 260.07
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.007000000000097
  episode_reward_min: -29.300000000000146
  episodes_this_iter: 3
  episodes_total: 2749
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003590032526358462
          cur_lr: 5.000000000000001e-05
          entropy: 1.232832670211792
          entropy_coeff: 0.009999999999999998
          kl: 0.02317708826840451
          policy_loss: -0.08676817557877964
          total_loss: 1.2909755322668288
          vf_explained_var: 0.061306171119213104
          vf_loss: 1.3900637083583407
    num_agent_steps_sampled: 867000
    num_agent_steps_trained: 867000
    num_steps_sampled: 867000
    num_steps_trained: 867000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,867,23353.2,867000,-26.007,-21.7,-29.3,260.07


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 868000
  custom_metrics: {}
  date: 2021-10-22_02-15-21
  done: false
  episode_len_mean: 260.24
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.024000000000097
  episode_reward_min: -29.300000000000146
  episodes_this_iter: 4
  episodes_total: 2753
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005385048789537692
          cur_lr: 5.000000000000001e-05
          entropy: 1.078588146633572
          entropy_coeff: 0.009999999999999998
          kl: 0.026684725664461516
          policy_loss: -0.04302594628598955
          total_loss: 1.1691781130101946
          vf_explained_var: 0.19000744819641113
          vf_loss: 1.2229755752616458
    num_agent_steps_sampled: 868000
    num_agent_steps_trained: 868000
    num_steps_sampled: 868000
    num_steps_trained: 868000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,868,23381.1,868000,-26.024,-21.7,-29.3,260.24


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 869000
  custom_metrics: {}
  date: 2021-10-22_02-15-49
  done: false
  episode_len_mean: 259.81
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.981000000000105
  episode_reward_min: -29.300000000000146
  episodes_this_iter: 4
  episodes_total: 2757
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008077573184306541
          cur_lr: 5.000000000000001e-05
          entropy: 1.1107466207610237
          entropy_coeff: 0.009999999999999998
          kl: 0.015699886803785843
          policy_loss: -0.04893633723258972
          total_loss: 1.0881902853647867
          vf_explained_var: 0.31266677379608154
          vf_loss: 1.1482214053471884
    num_agent_steps_sampled: 869000
    num_agent_steps_trained: 869000
    num_steps_sampled: 869000
    num_steps_trained: 869000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,869,23409.4,869000,-25.981,-21.7,-29.3,259.81




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 870000
  custom_metrics: {}
  date: 2021-10-22_02-16-37
  done: false
  episode_len_mean: 259.17
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.9170000000001
  episode_reward_min: -29.300000000000146
  episodes_this_iter: 5
  episodes_total: 2762
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008077573184306541
          cur_lr: 5.000000000000001e-05
          entropy: 1.0483857168091668
          entropy_coeff: 0.009999999999999998
          kl: 0.012962978613673215
          policy_loss: -0.0005354169342252943
          total_loss: 1.278286752435896
          vf_explained_var: 0.35682356357574463
          vf_loss: 1.2892955501874288
    num_agent_steps_sampled: 870000
    num_agent_steps_trained: 870000
    num_steps_sampled: 870000
    num_steps_trained: 870000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,870,23456.9,870000,-25.917,-21.7,-29.3,259.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 871000
  custom_metrics: {}
  date: 2021-10-22_02-17-08
  done: false
  episode_len_mean: 259.0
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.9000000000001
  episode_reward_min: -29.300000000000146
  episodes_this_iter: 4
  episodes_total: 2766
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008077573184306541
          cur_lr: 5.000000000000001e-05
          entropy: 1.1981170588069492
          entropy_coeff: 0.009999999999999998
          kl: 0.02472417194285994
          policy_loss: 0.025674175553851656
          total_loss: 0.8454483330249787
          vf_explained_var: 0.526802659034729
          vf_loss: 0.8317353367805481
    num_agent_steps_sampled: 871000
    num_agent_steps_trained: 871000
    num_steps_sampled: 871000
    num_steps_trained: 871000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,871,23488,871000,-25.9,-21.7,-29.3,259


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 872000
  custom_metrics: {}
  date: 2021-10-22_02-17-37
  done: false
  episode_len_mean: 259.09
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -25.909000000000095
  episode_reward_min: -29.300000000000146
  episodes_this_iter: 4
  episodes_total: 2770
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0012116359776459808
          cur_lr: 5.000000000000001e-05
          entropy: 1.1440918154186672
          entropy_coeff: 0.009999999999999998
          kl: 0.007652945777592259
          policy_loss: 0.011997423569361369
          total_loss: 0.6412759688165452
          vf_explained_var: 0.6977786421775818
          vf_loss: 0.6407101935810513
    num_agent_steps_sampled: 872000
    num_agent_steps_trained: 872000
    num_steps_sampled: 872000
    num_steps_trained: 872000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,872,23517,872000,-25.909,-21.8,-29.3,259.09


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 873000
  custom_metrics: {}
  date: 2021-10-22_02-18-06
  done: false
  episode_len_mean: 258.99
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -25.8990000000001
  episode_reward_min: -29.300000000000146
  episodes_this_iter: 4
  episodes_total: 2774
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0012116359776459808
          cur_lr: 5.000000000000001e-05
          entropy: 1.1451343496640523
          entropy_coeff: 0.009999999999999998
          kl: 0.01191495148568475
          policy_loss: 0.03805029648873541
          total_loss: 0.52913346009122
          vf_explained_var: 0.8250316977500916
          vf_loss: 0.5025200704733531
    num_agent_steps_sampled: 873000
    num_agent_steps_trained: 873000
    num_steps_sampled: 873000
    num_steps_trained: 873000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,873,23545.6,873000,-25.899,-21.8,-29.3,258.99


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 874000
  custom_metrics: {}
  date: 2021-10-22_02-18-34
  done: false
  episode_len_mean: 258.53
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -25.8530000000001
  episode_reward_min: -29.300000000000146
  episodes_this_iter: 4
  episodes_total: 2778
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0012116359776459808
          cur_lr: 5.000000000000001e-05
          entropy: 1.0105375925699869
          entropy_coeff: 0.009999999999999998
          kl: 0.0077068661343579
          policy_loss: 0.02252567062775294
          total_loss: 0.5197585841019948
          vf_explained_var: 0.8298171758651733
          vf_loss: 0.5073289586438073
    num_agent_steps_sampled: 874000
    num_agent_steps_trained: 874000
    num_steps_sampled: 874000
    num_steps_trained: 874000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,874,23574.3,874000,-25.853,-21.8,-29.3,258.53


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 875000
  custom_metrics: {}
  date: 2021-10-22_02-19-05
  done: false
  episode_len_mean: 257.79
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -25.7790000000001
  episode_reward_min: -29.300000000000146
  episodes_this_iter: 4
  episodes_total: 2782
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0012116359776459808
          cur_lr: 5.000000000000001e-05
          entropy: 1.0571741249826219
          entropy_coeff: 0.009999999999999998
          kl: 0.018917502570093742
          policy_loss: 0.04699071952038341
          total_loss: 0.6494499418470595
          vf_explained_var: 0.7446594834327698
          vf_loss: 0.6130080428388384
    num_agent_steps_sampled: 875000
    num_agent_steps_trained: 875000
    num_steps_sampled: 875000
    num_steps_trained: 875000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,875,23604.9,875000,-25.779,-21.8,-29.3,257.79


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 876000
  custom_metrics: {}
  date: 2021-10-22_02-19-34
  done: false
  episode_len_mean: 257.06
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -25.706000000000095
  episode_reward_min: -29.300000000000146
  episodes_this_iter: 4
  episodes_total: 2786
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0012116359776459808
          cur_lr: 5.000000000000001e-05
          entropy: 1.1314325948556265
          entropy_coeff: 0.009999999999999998
          kl: 0.062586200126973
          policy_loss: 0.03906963658001688
          total_loss: 0.5259194052881665
          vf_explained_var: 0.7850596308708191
          vf_loss: 0.4980882677767012
    num_agent_steps_sampled: 876000
    num_agent_steps_trained: 876000
    num_steps_sampled: 876000
    num_steps_trained: 876000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,876,23634.3,876000,-25.706,-21.8,-29.3,257.06




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 877000
  custom_metrics: {}
  date: 2021-10-22_02-20-16
  done: false
  episode_len_mean: 258.14
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -25.8140000000001
  episode_reward_min: -29.60000000000015
  episodes_this_iter: 4
  episodes_total: 2790
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018174539664689716
          cur_lr: 5.000000000000001e-05
          entropy: 1.2385310742590163
          entropy_coeff: 0.009999999999999998
          kl: 0.018048140891041776
          policy_loss: 0.025942060351371764
          total_loss: 1.1004429201285044
          vf_explained_var: 0.4699958264827728
          vf_loss: 1.0868533578183917
    num_agent_steps_sampled: 877000
    num_agent_steps_trained: 877000
    num_steps_sampled: 877000
    num_steps_trained: 877000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,877,23676.4,877000,-25.814,-21.8,-29.6,258.14


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 878000
  custom_metrics: {}
  date: 2021-10-22_02-20-39
  done: false
  episode_len_mean: 260.29
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -26.029000000000106
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 3
  episodes_total: 2793
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018174539664689716
          cur_lr: 5.000000000000001e-05
          entropy: 1.149972629547119
          entropy_coeff: 0.009999999999999998
          kl: 0.015138730900281968
          policy_loss: 0.039086932440598805
          total_loss: 0.8299000716871685
          vf_explained_var: 0.4938690960407257
          vf_loss: 0.8022853596342935
    num_agent_steps_sampled: 878000
    num_agent_steps_trained: 878000
    num_steps_sampled: 878000
    num_steps_trained: 878000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,878,23698.7,878000,-26.029,-21.8,-32.7,260.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 879000
  custom_metrics: {}
  date: 2021-10-22_02-21-04
  done: false
  episode_len_mean: 261.39
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -26.139000000000106
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 3
  episodes_total: 2796
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018174539664689716
          cur_lr: 5.000000000000001e-05
          entropy: 1.2795098026593525
          entropy_coeff: 0.009999999999999998
          kl: 0.024075690723802858
          policy_loss: 0.05220838321579827
          total_loss: 0.7800615390141805
          vf_explained_var: 0.3572159707546234
          vf_loss: 0.7406045099099478
    num_agent_steps_sampled: 879000
    num_agent_steps_trained: 879000
    num_steps_sampled: 879000
    num_steps_trained: 879000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,879,23723.6,879000,-26.139,-21.8,-32.7,261.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 880000
  custom_metrics: {}
  date: 2021-10-22_02-21-29
  done: false
  episode_len_mean: 262.61
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -26.2610000000001
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 3
  episodes_total: 2799
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0027261809497034564
          cur_lr: 5.000000000000001e-05
          entropy: 1.2467216412226358
          entropy_coeff: 0.009999999999999998
          kl: 0.029609825802416938
          policy_loss: -0.10682968397935232
          total_loss: 1.2241012679206
          vf_explained_var: 0.035884127020835876
          vf_loss: 1.3433174596892463
    num_agent_steps_sampled: 880000
    num_agent_steps_trained: 880000
    num_steps_sampled: 880000
    num_steps_trained: 880000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,880,23748.7,880000,-26.261,-21.8,-32.7,262.61


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 881000
  custom_metrics: {}
  date: 2021-10-22_02-21-53
  done: false
  episode_len_mean: 264.37
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -26.43700000000011
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 2803
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.004089271424555187
          cur_lr: 5.000000000000001e-05
          entropy: 1.1953543133205837
          entropy_coeff: 0.009999999999999998
          kl: 0.03207855730970266
          policy_loss: 0.02689324522184001
          total_loss: 1.2403660866949293
          vf_explained_var: 0.13277429342269897
          vf_loss: 1.225295184718238
    num_agent_steps_sampled: 881000
    num_agent_steps_trained: 881000
    num_steps_sampled: 881000
    num_steps_trained: 881000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,881,23773.2,881000,-26.437,-21.8,-32.7,264.37


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 882000
  custom_metrics: {}
  date: 2021-10-22_02-22-21
  done: false
  episode_len_mean: 264.5
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -26.45000000000011
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 2807
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006133907136832781
          cur_lr: 5.000000000000001e-05
          entropy: 1.2239833063549466
          entropy_coeff: 0.009999999999999998
          kl: 0.04593190158268688
          policy_loss: 0.007934219141801199
          total_loss: 1.1695311572816638
          vf_explained_var: 0.17614410817623138
          vf_loss: 1.173555024464925
    num_agent_steps_sampled: 882000
    num_agent_steps_trained: 882000
    num_steps_sampled: 882000
    num_steps_trained: 882000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,882,23801.1,882000,-26.45,-21.8,-32.7,264.5


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 883000
  custom_metrics: {}
  date: 2021-10-22_02-22-50
  done: false
  episode_len_mean: 263.59
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -26.359000000000105
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 2811
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00920086070524917
          cur_lr: 5.000000000000001e-05
          entropy: 1.204476805528005
          entropy_coeff: 0.009999999999999998
          kl: 0.01378749312907838
          policy_loss: 0.003616497251722548
          total_loss: 1.312039307753245
          vf_explained_var: 0.04485442861914635
          vf_loss: 1.320340707567003
    num_agent_steps_sampled: 883000
    num_agent_steps_trained: 883000
    num_steps_sampled: 883000
    num_steps_trained: 883000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,883,23830.2,883000,-26.359,-21.8,-32.7,263.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 884000
  custom_metrics: {}
  date: 2021-10-22_02-23-21
  done: false
  episode_len_mean: 262.36
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -26.236000000000107
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 2815
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00920086070524917
          cur_lr: 5.000000000000001e-05
          entropy: 1.1135781897438897
          entropy_coeff: 0.009999999999999998
          kl: 0.006851681158490979
          policy_loss: 0.019768307606379192
          total_loss: 1.3307909422450594
          vf_explained_var: 0.052141301333904266
          vf_loss: 1.322095384862688
    num_agent_steps_sampled: 884000
    num_agent_steps_trained: 884000
    num_steps_sampled: 884000
    num_steps_trained: 884000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,884,23860.9,884000,-26.236,-21.8,-32.7,262.36




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 885000
  custom_metrics: {}
  date: 2021-10-22_02-24-08
  done: false
  episode_len_mean: 259.87
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.987000000000098
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 5
  episodes_total: 2820
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00920086070524917
          cur_lr: 5.000000000000001e-05
          entropy: 1.1204081906212702
          entropy_coeff: 0.009999999999999998
          kl: 0.00982648131306413
          policy_loss: 0.06559387213653989
          total_loss: 1.3398731655544704
          vf_explained_var: 0.07618849724531174
          vf_loss: 1.285392955938975
    num_agent_steps_sampled: 885000
    num_agent_steps_trained: 885000
    num_steps_sampled: 885000
    num_steps_trained: 885000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,885,23907.4,885000,-25.987,-19.6,-32.7,259.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 886000
  custom_metrics: {}
  date: 2021-10-22_02-24-40
  done: false
  episode_len_mean: 258.53
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.853000000000097
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 2824
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00920086070524917
          cur_lr: 5.000000000000001e-05
          entropy: 1.0885973109139337
          entropy_coeff: 0.009999999999999998
          kl: 0.011047202141536002
          policy_loss: 0.02715381036202113
          total_loss: 1.4381278528107537
          vf_explained_var: 0.024786408990621567
          vf_loss: 1.4217583749029372
    num_agent_steps_sampled: 886000
    num_agent_steps_trained: 886000
    num_steps_sampled: 886000
    num_steps_trained: 886000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,886,23940,886000,-25.853,-19.6,-32.7,258.53


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 887000
  custom_metrics: {}
  date: 2021-10-22_02-25-10
  done: false
  episode_len_mean: 257.17
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.7170000000001
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 2828
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00920086070524917
          cur_lr: 5.000000000000001e-05
          entropy: 1.0386271125740476
          entropy_coeff: 0.009999999999999998
          kl: 0.012427214234627273
          policy_loss: -0.015065187008844481
          total_loss: 1.441263688935174
          vf_explained_var: 0.04513215646147728
          vf_loss: 1.4666007982359992
    num_agent_steps_sampled: 887000
    num_agent_steps_trained: 887000
    num_steps_sampled: 887000
    num_steps_trained: 887000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,887,23970.3,887000,-25.717,-19.6,-32.7,257.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 888000
  custom_metrics: {}
  date: 2021-10-22_02-25-42
  done: false
  episode_len_mean: 254.79
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.47900000000009
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 5
  episodes_total: 2833
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00920086070524917
          cur_lr: 5.000000000000001e-05
          entropy: 1.0820303161938984
          entropy_coeff: 0.009999999999999998
          kl: 0.012709903961265607
          policy_loss: -0.02226100116968155
          total_loss: 1.7994039151403638
          vf_explained_var: 0.051499828696250916
          vf_loss: 1.832368262608846
    num_agent_steps_sampled: 888000
    num_agent_steps_trained: 888000
    num_steps_sampled: 888000
    num_steps_trained: 888000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,888,24001.3,888000,-25.479,-19.6,-32.7,254.79


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 889000
  custom_metrics: {}
  date: 2021-10-22_02-26-12
  done: false
  episode_len_mean: 252.86
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.286000000000087
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 2837
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00920086070524917
          cur_lr: 5.000000000000001e-05
          entropy: 1.1656808535257974
          entropy_coeff: 0.009999999999999998
          kl: 0.016577124606822657
          policy_loss: 0.02046565848092238
          total_loss: 1.3964236087269253
          vf_explained_var: 0.031501058489084244
          vf_loss: 1.3874622411198085
    num_agent_steps_sampled: 889000
    num_agent_steps_trained: 889000
    num_steps_sampled: 889000
    num_steps_trained: 889000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,889,24031.4,889000,-25.286,-19.6,-32.7,252.86


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 890000
  custom_metrics: {}
  date: 2021-10-22_02-26-41
  done: false
  episode_len_mean: 252.19
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.219000000000086
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 2841
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00920086070524917
          cur_lr: 5.000000000000001e-05
          entropy: 1.2158813304371303
          entropy_coeff: 0.009999999999999998
          kl: 0.015350701120805275
          policy_loss: 0.03527471100290616
          total_loss: 1.302020329899258
          vf_explained_var: 0.07460512220859528
          vf_loss: 1.2787631842825147
    num_agent_steps_sampled: 890000
    num_agent_steps_trained: 890000
    num_steps_sampled: 890000
    num_steps_trained: 890000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,890,24060.3,890000,-25.219,-19.6,-32.7,252.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 891000
  custom_metrics: {}
  date: 2021-10-22_02-27-11
  done: false
  episode_len_mean: 251.04
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.10400000000009
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 2845
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00920086070524917
          cur_lr: 5.000000000000001e-05
          entropy: 1.1770859122276307
          entropy_coeff: 0.009999999999999998
          kl: 0.009128966401724309
          policy_loss: -0.052383084098498026
          total_loss: 1.3673475411203173
          vf_explained_var: 0.031143521890044212
          vf_loss: 1.4314175049463909
    num_agent_steps_sampled: 891000
    num_agent_steps_trained: 891000
    num_steps_sampled: 891000
    num_steps_trained: 891000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,891,24090.8,891000,-25.104,-19.6,-32.7,251.04




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 892000
  custom_metrics: {}
  date: 2021-10-22_02-28-00
  done: false
  episode_len_mean: 248.61
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -24.861000000000082
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 5
  episodes_total: 2850
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00920086070524917
          cur_lr: 5.000000000000001e-05
          entropy: 1.1700931813981799
          entropy_coeff: 0.009999999999999998
          kl: 0.005098098487001026
          policy_loss: -0.002291638238562478
          total_loss: 1.7175567057397632
          vf_explained_var: 0.04976064711809158
          vf_loss: 1.7315023687150743
    num_agent_steps_sampled: 892000
    num_agent_steps_trained: 892000
    num_steps_sampled: 892000
    num_steps_trained: 892000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,892,24139.8,892000,-24.861,-19.6,-32.7,248.61


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 893000
  custom_metrics: {}
  date: 2021-10-22_02-28-31
  done: false
  episode_len_mean: 247.91
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -24.791000000000082
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 2854
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00920086070524917
          cur_lr: 5.000000000000001e-05
          entropy: 1.2673552658822802
          entropy_coeff: 0.009999999999999998
          kl: 0.035239613360269886
          policy_loss: 0.00576486372285419
          total_loss: 1.3585318638218773
          vf_explained_var: 0.09850341081619263
          vf_loss: 1.3651163140932718
    num_agent_steps_sampled: 893000
    num_agent_steps_trained: 893000
    num_steps_sampled: 893000
    num_steps_trained: 893000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,893,24170.6,893000,-24.791,-19.6,-32.7,247.91


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 894000
  custom_metrics: {}
  date: 2021-10-22_02-29-02
  done: false
  episode_len_mean: 247.02
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -24.702000000000083
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 5
  episodes_total: 2859
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013801291057873755
          cur_lr: 5.000000000000001e-05
          entropy: 1.1474210169580248
          entropy_coeff: 0.009999999999999998
          kl: 0.01076147436908741
          policy_loss: -0.022745416892899408
          total_loss: 1.8199569781621296
          vf_explained_var: 0.022208765149116516
          vf_loss: 1.8540280805693732
    num_agent_steps_sampled: 894000
    num_agent_steps_trained: 894000
    num_steps_sampled: 894000
    num_steps_trained: 894000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,894,24201.2,894000,-24.702,-19.6,-32.7,247.02


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 895000
  custom_metrics: {}
  date: 2021-10-22_02-29-33
  done: false
  episode_len_mean: 246.83
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -24.683000000000085
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 2863
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013801291057873755
          cur_lr: 5.000000000000001e-05
          entropy: 1.2188363022274442
          entropy_coeff: 0.009999999999999998
          kl: 0.013788265894757186
          policy_loss: 0.015378685212797588
          total_loss: 1.3824218697018094
          vf_explained_var: 0.0286225788295269
          vf_loss: 1.3790412664413452
    num_agent_steps_sampled: 895000
    num_agent_steps_trained: 895000
    num_steps_sampled: 895000
    num_steps_trained: 895000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,895,24232.3,895000,-24.683,-19.6,-32.7,246.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 896000
  custom_metrics: {}
  date: 2021-10-22_02-30-03
  done: false
  episode_len_mean: 246.42
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -24.642000000000085
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 2867
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013801291057873755
          cur_lr: 5.000000000000001e-05
          entropy: 1.1726149227884082
          entropy_coeff: 0.009999999999999998
          kl: 0.00943077665199136
          policy_loss: -0.01615112531516287
          total_loss: 1.3846430778503418
          vf_explained_var: 0.03093118593096733
          vf_loss: 1.412390214867062
    num_agent_steps_sampled: 896000
    num_agent_steps_trained: 896000
    num_steps_sampled: 896000
    num_steps_trained: 896000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,896,24262.6,896000,-24.642,-19.6,-32.7,246.42


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 897000
  custom_metrics: {}
  date: 2021-10-22_02-30-34
  done: false
  episode_len_mean: 245.31
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -24.53100000000008
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 5
  episodes_total: 2872
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013801291057873755
          cur_lr: 5.000000000000001e-05
          entropy: 1.149432114760081
          entropy_coeff: 0.009999999999999998
          kl: 0.007186419174374868
          policy_loss: -0.015166438205374612
          total_loss: 1.8032726579242282
          vf_explained_var: 0.05727095156908035
          vf_loss: 1.8298342029253643
    num_agent_steps_sampled: 897000
    num_agent_steps_trained: 897000
    num_steps_sampled: 897000
    num_steps_trained: 897000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,897,24293.4,897000,-24.531,-19.6,-32.7,245.31


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 898000
  custom_metrics: {}
  date: 2021-10-22_02-31-05
  done: false
  episode_len_mean: 244.57
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -24.45700000000008
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 2876
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013801291057873755
          cur_lr: 5.000000000000001e-05
          entropy: 1.1375933011372885
          entropy_coeff: 0.009999999999999998
          kl: 0.013899218697289939
          policy_loss: 0.015250640776422289
          total_loss: 1.3987800863054063
          vf_explained_var: 0.06144251301884651
          vf_loss: 1.394713540871938
    num_agent_steps_sampled: 898000
    num_agent_steps_trained: 898000
    num_steps_sampled: 898000
    num_steps_trained: 898000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,898,24325.1,898000,-24.457,-19.6,-32.7,244.57




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 899000
  custom_metrics: {}
  date: 2021-10-22_02-31-57
  done: false
  episode_len_mean: 243.21
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -24.321000000000073
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 5
  episodes_total: 2881
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013801291057873755
          cur_lr: 5.000000000000001e-05
          entropy: 1.131120428774092
          entropy_coeff: 0.009999999999999998
          kl: 0.00852031295811331
          policy_loss: -0.01738277276357015
          total_loss: 1.7628383172882929
          vf_explained_var: 0.09539022296667099
          vf_loss: 1.7914147231313917
    num_agent_steps_sampled: 899000
    num_agent_steps_trained: 899000
    num_steps_sampled: 899000
    num_steps_trained: 899000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,899,24376.2,899000,-24.321,-19,-32.7,243.21


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 900000
  custom_metrics: {}
  date: 2021-10-22_02-32-28
  done: false
  episode_len_mean: 242.43
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -24.243000000000073
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 2885
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013801291057873755
          cur_lr: 5.000000000000001e-05
          entropy: 1.125758461157481
          entropy_coeff: 0.009999999999999998
          kl: 0.00798559786616377
          policy_loss: 0.03996645758549373
          total_loss: 1.2064546373155383
          vf_explained_var: 0.07734543830156326
          vf_loss: 1.1776355445384978
    num_agent_steps_sampled: 900000
    num_agent_steps_trained: 900000
    num_steps_sampled: 900000
    num_steps_trained: 900000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,900,24407.7,900000,-24.243,-19,-32.7,242.43


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 901000
  custom_metrics: {}
  date: 2021-10-22_02-33-01
  done: false
  episode_len_mean: 239.92
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -23.992000000000065
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 5
  episodes_total: 2890
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013801291057873755
          cur_lr: 5.000000000000001e-05
          entropy: 1.0439882304933337
          entropy_coeff: 0.009999999999999998
          kl: 0.008218962838950322
          policy_loss: -0.01855236382948028
          total_loss: 1.7545610030492147
          vf_explained_var: 0.07266898453235626
          vf_loss: 1.783439830938975
    num_agent_steps_sampled: 901000
    num_agent_steps_trained: 901000
    num_steps_sampled: 901000
    num_steps_trained: 901000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,901,24440.4,901000,-23.992,-19,-32.7,239.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 902000
  custom_metrics: {}
  date: 2021-10-22_02-33-32
  done: false
  episode_len_mean: 236.29
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -23.629000000000065
  episode_reward_min: -30.400000000000162
  episodes_this_iter: 4
  episodes_total: 2894
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013801291057873755
          cur_lr: 5.000000000000001e-05
          entropy: 1.0099501285288068
          entropy_coeff: 0.009999999999999998
          kl: 0.004440106332433895
          policy_loss: 0.03311909619304869
          total_loss: 1.4409344540701972
          vf_explained_var: 0.0635492280125618
          vf_loss: 1.4178535726335313
    num_agent_steps_sampled: 902000
    num_agent_steps_trained: 902000
    num_steps_sampled: 902000
    num_steps_trained: 902000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,902,24471.6,902000,-23.629,-19,-30.4,236.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 903000
  custom_metrics: {}
  date: 2021-10-22_02-34-03
  done: false
  episode_len_mean: 233.35
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -23.335000000000054
  episode_reward_min: -30.30000000000016
  episodes_this_iter: 4
  episodes_total: 2898
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006900645528936877
          cur_lr: 5.000000000000001e-05
          entropy: 1.0871533513069154
          entropy_coeff: 0.009999999999999998
          kl: 0.006061768904590142
          policy_loss: -0.045635642690791026
          total_loss: 1.4040044373936122
          vf_explained_var: 0.058274734765291214
          vf_loss: 1.460469787650638
    num_agent_steps_sampled: 903000
    num_agent_steps_trained: 903000
    num_steps_sampled: 903000
    num_steps_trained: 903000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,903,24502.9,903000,-23.335,-19,-30.3,233.35


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 904000
  custom_metrics: {}
  date: 2021-10-22_02-34-35
  done: false
  episode_len_mean: 229.82
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -22.982000000000063
  episode_reward_min: -26.400000000000105
  episodes_this_iter: 5
  episodes_total: 2903
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006900645528936877
          cur_lr: 5.000000000000001e-05
          entropy: 1.0654787613285912
          entropy_coeff: 0.009999999999999998
          kl: 0.00985679841444696
          policy_loss: -0.002650474425819185
          total_loss: 1.3220460143354205
          vf_explained_var: 0.29552161693573
          vf_loss: 1.3352832562393613
    num_agent_steps_sampled: 904000
    num_agent_steps_trained: 904000
    num_steps_sampled: 904000
    num_steps_trained: 904000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,904,24534.8,904000,-22.982,-19,-26.4,229.82


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 905000
  custom_metrics: {}
  date: 2021-10-22_02-35-07
  done: false
  episode_len_mean: 228.1
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -22.810000000000045
  episode_reward_min: -26.300000000000104
  episodes_this_iter: 4
  episodes_total: 2907
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006900645528936877
          cur_lr: 5.000000000000001e-05
          entropy: 1.0656474457846747
          entropy_coeff: 0.009999999999999998
          kl: 0.012380441298363583
          policy_loss: -0.06451308553417524
          total_loss: 0.9290924383534326
          vf_explained_var: 0.27982446551322937
          vf_loss: 1.0041765683227115
    num_agent_steps_sampled: 905000
    num_agent_steps_trained: 905000
    num_steps_sampled: 905000
    num_steps_trained: 905000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,905,24566.5,905000,-22.81,-19,-26.3,228.1




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 906000
  custom_metrics: {}
  date: 2021-10-22_02-35-56
  done: false
  episode_len_mean: 227.07
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -22.707000000000047
  episode_reward_min: -26.300000000000104
  episodes_this_iter: 5
  episodes_total: 2912
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006900645528936877
          cur_lr: 5.000000000000001e-05
          entropy: 1.0537158244185978
          entropy_coeff: 0.009999999999999998
          kl: 0.010717381084190375
          policy_loss: 0.01605062000453472
          total_loss: 1.0530468245347342
          vf_explained_var: 0.3701708912849426
          vf_loss: 1.0474593960576588
    num_agent_steps_sampled: 906000
    num_agent_steps_trained: 906000
    num_steps_sampled: 906000
    num_steps_trained: 906000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,906,24615.8,906000,-22.707,-19,-26.3,227.07


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 907000
  custom_metrics: {}
  date: 2021-10-22_02-36-27
  done: false
  episode_len_mean: 226.71
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -22.67100000000006
  episode_reward_min: -26.300000000000104
  episodes_this_iter: 5
  episodes_total: 2917
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006900645528936877
          cur_lr: 5.000000000000001e-05
          entropy: 1.1231515460544161
          entropy_coeff: 0.009999999999999998
          kl: 0.009486108734782226
          policy_loss: 0.01578649663262897
          total_loss: 0.8401908020178477
          vf_explained_var: 0.4897659718990326
          vf_loss: 0.83557035724322
    num_agent_steps_sampled: 907000
    num_agent_steps_trained: 907000
    num_steps_sampled: 907000
    num_steps_trained: 907000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,907,24646.5,907000,-22.671,-19,-26.3,226.71


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 908000
  custom_metrics: {}
  date: 2021-10-22_02-36-59
  done: false
  episode_len_mean: 226.74
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -22.674000000000046
  episode_reward_min: -26.300000000000104
  episodes_this_iter: 4
  episodes_total: 2921
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006900645528936877
          cur_lr: 5.000000000000001e-05
          entropy: 1.1546019342210558
          entropy_coeff: 0.009999999999999998
          kl: 0.010050570611712493
          policy_loss: 0.0027092030478848353
          total_loss: 0.7373761369122399
          vf_explained_var: 0.46755850315093994
          vf_loss: 0.7461435986889733
    num_agent_steps_sampled: 908000
    num_agent_steps_trained: 908000
    num_steps_sampled: 908000
    num_steps_trained: 908000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,908,24678,908000,-22.674,-19,-26.3,226.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 909000
  custom_metrics: {}
  date: 2021-10-22_02-37-30
  done: false
  episode_len_mean: 226.6
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -22.66000000000005
  episode_reward_min: -26.300000000000104
  episodes_this_iter: 4
  episodes_total: 2925
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006900645528936877
          cur_lr: 5.000000000000001e-05
          entropy: 1.2447211292054918
          entropy_coeff: 0.009999999999999998
          kl: 0.012188814782071787
          policy_loss: -0.03225061214632458
          total_loss: 0.6984625269969305
          vf_explained_var: 0.45092663168907166
          vf_loss: 0.7430762380361557
    num_agent_steps_sampled: 909000
    num_agent_steps_trained: 909000
    num_steps_sampled: 909000
    num_steps_trained: 909000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,909,24709,909000,-22.66,-19,-26.3,226.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 910000
  custom_metrics: {}
  date: 2021-10-22_02-38-01
  done: false
  episode_len_mean: 226.81
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -22.681000000000058
  episode_reward_min: -26.300000000000104
  episodes_this_iter: 5
  episodes_total: 2930
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006900645528936877
          cur_lr: 5.000000000000001e-05
          entropy: 1.2786743230289883
          entropy_coeff: 0.009999999999999998
          kl: 0.011891621851277515
          policy_loss: 0.009784665745165613
          total_loss: 0.6034757680363125
          vf_explained_var: 0.6718790531158447
          vf_loss: 0.6063957793845071
    num_agent_steps_sampled: 910000
    num_agent_steps_trained: 910000
    num_steps_sampled: 910000
    num_steps_trained: 910000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,910,24739.9,910000,-22.681,-19,-26.3,226.81


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 911000
  custom_metrics: {}
  date: 2021-10-22_02-38-30
  done: false
  episode_len_mean: 227.22
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -22.722000000000058
  episode_reward_min: -26.300000000000104
  episodes_this_iter: 4
  episodes_total: 2934
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006900645528936877
          cur_lr: 5.000000000000001e-05
          entropy: 1.3548756029870774
          entropy_coeff: 0.009999999999999998
          kl: 0.028822753999783242
          policy_loss: 0.01162386222018136
          total_loss: 0.50293650544352
          vf_explained_var: 0.6182818412780762
          vf_loss: 0.5046625018119812
    num_agent_steps_sampled: 911000
    num_agent_steps_trained: 911000
    num_steps_sampled: 911000
    num_steps_trained: 911000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,911,24769.2,911000,-22.722,-19,-26.3,227.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 912000
  custom_metrics: {}
  date: 2021-10-22_02-39-00
  done: false
  episode_len_mean: 227.52
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -22.752000000000056
  episode_reward_min: -26.300000000000104
  episodes_this_iter: 4
  episodes_total: 2938
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010350968293405316
          cur_lr: 5.000000000000001e-05
          entropy: 1.3337324023246766
          entropy_coeff: 0.009999999999999998
          kl: 0.04545269061306349
          policy_loss: 0.02194432740410169
          total_loss: 0.3732961556977696
          vf_explained_var: 0.7455543279647827
          vf_loss: 0.3642186727788713
    num_agent_steps_sampled: 912000
    num_agent_steps_trained: 912000
    num_steps_sampled: 912000
    num_steps_trained: 912000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,912,24799,912000,-22.752,-19,-26.3,227.52




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 913000
  custom_metrics: {}
  date: 2021-10-22_02-39-47
  done: false
  episode_len_mean: 227.12
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -22.71200000000005
  episode_reward_min: -25.900000000000098
  episodes_this_iter: 4
  episodes_total: 2942
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.015526452440107974
          cur_lr: 5.000000000000001e-05
          entropy: 1.3580954313278197
          entropy_coeff: 0.009999999999999998
          kl: 0.02844116531344165
          policy_loss: -0.010396418554915323
          total_loss: 0.3360411097606023
          vf_explained_var: 0.7160120606422424
          vf_loss: 0.3595768940117624
    num_agent_steps_sampled: 913000
    num_agent_steps_trained: 913000
    num_steps_sampled: 913000
    num_steps_trained: 913000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,913,24846.6,913000,-22.712,-19,-25.9,227.12


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 914000
  custom_metrics: {}
  date: 2021-10-22_02-40-17
  done: false
  episode_len_mean: 227.65
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -22.765000000000057
  episode_reward_min: -25.900000000000098
  episodes_this_iter: 4
  episodes_total: 2946
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02328967866016196
          cur_lr: 5.000000000000001e-05
          entropy: 1.4109629405869377
          entropy_coeff: 0.009999999999999998
          kl: 0.014115707198056448
          policy_loss: -0.06976796388626098
          total_loss: 0.38359717826048534
          vf_explained_var: 0.5558303594589233
          vf_loss: 0.4671460251013438
    num_agent_steps_sampled: 914000
    num_agent_steps_trained: 914000
    num_steps_sampled: 914000
    num_steps_trained: 914000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,914,24876.1,914000,-22.765,-19,-25.9,227.65


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 915000
  custom_metrics: {}
  date: 2021-10-22_02-40-45
  done: false
  episode_len_mean: 228.59
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -22.859000000000055
  episode_reward_min: -25.900000000000098
  episodes_this_iter: 4
  episodes_total: 2950
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02328967866016196
          cur_lr: 5.000000000000001e-05
          entropy: 1.4922642866770426
          entropy_coeff: 0.009999999999999998
          kl: 0.035722213196056965
          policy_loss: -0.08400608226656914
          total_loss: 0.4450269242127736
          vf_explained_var: 0.5307880640029907
          vf_loss: 0.5431236859824923
    num_agent_steps_sampled: 915000
    num_agent_steps_trained: 915000
    num_steps_sampled: 915000
    num_steps_trained: 915000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,915,24904.7,915000,-22.859,-19,-25.9,228.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 916000
  custom_metrics: {}
  date: 2021-10-22_02-41-15
  done: false
  episode_len_mean: 229.06
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -22.906000000000063
  episode_reward_min: -25.40000000000009
  episodes_this_iter: 4
  episodes_total: 2954
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03493451799024294
          cur_lr: 5.000000000000001e-05
          entropy: 1.4641570130983987
          entropy_coeff: 0.009999999999999998
          kl: 0.01226573123422033
          policy_loss: -0.09657320007681847
          total_loss: 0.5410754634274377
          vf_explained_var: 0.5745249390602112
          vf_loss: 0.6518617391586303
    num_agent_steps_sampled: 916000
    num_agent_steps_trained: 916000
    num_steps_sampled: 916000
    num_steps_trained: 916000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,916,24933.7,916000,-22.906,-19,-25.4,229.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 917000
  custom_metrics: {}
  date: 2021-10-22_02-41-43
  done: false
  episode_len_mean: 229.99
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -22.99900000000006
  episode_reward_min: -25.700000000000095
  episodes_this_iter: 4
  episodes_total: 2958
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03493451799024294
          cur_lr: 5.000000000000001e-05
          entropy: 1.4247512155108981
          entropy_coeff: 0.009999999999999998
          kl: 0.008913245595187726
          policy_loss: -0.11436914992001322
          total_loss: 0.5393897705607944
          vf_explained_var: 0.5786973237991333
          vf_loss: 0.6676950501071082
    num_agent_steps_sampled: 917000
    num_agent_steps_trained: 917000
    num_steps_sampled: 917000
    num_steps_trained: 917000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,917,24962.1,917000,-22.999,-19,-25.7,229.99


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 918000
  custom_metrics: {}
  date: 2021-10-22_02-42-11
  done: false
  episode_len_mean: 230.84
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -23.084000000000056
  episode_reward_min: -25.700000000000095
  episodes_this_iter: 4
  episodes_total: 2962
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03493451799024294
          cur_lr: 5.000000000000001e-05
          entropy: 1.3536129077275594
          entropy_coeff: 0.009999999999999998
          kl: 0.014103467836991177
          policy_loss: -0.1405384393615855
          total_loss: 0.4677959508366055
          vf_explained_var: 0.6899142265319824
          vf_loss: 0.6213778297106425
    num_agent_steps_sampled: 918000
    num_agent_steps_trained: 918000
    num_steps_sampled: 918000
    num_steps_trained: 918000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,918,24990.4,918000,-23.084,-19,-25.7,230.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 919000
  custom_metrics: {}
  date: 2021-10-22_02-42-40
  done: false
  episode_len_mean: 231.85
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -23.185000000000056
  episode_reward_min: -25.700000000000095
  episodes_this_iter: 4
  episodes_total: 2966
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03493451799024294
          cur_lr: 5.000000000000001e-05
          entropy: 1.2651468435923259
          entropy_coeff: 0.009999999999999998
          kl: 0.012224007945598933
          policy_loss: -0.05975888292822573
          total_loss: 0.4069404161638684
          vf_explained_var: 0.7467930912971497
          vf_loss: 0.47892373038662805
    num_agent_steps_sampled: 919000
    num_agent_steps_trained: 919000
    num_steps_sampled: 919000
    num_steps_trained: 919000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,919,25018.7,919000,-23.185,-19,-25.7,231.85




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 920000
  custom_metrics: {}
  date: 2021-10-22_02-43-26
  done: false
  episode_len_mean: 232.58
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -23.258000000000052
  episode_reward_min: -25.700000000000095
  episodes_this_iter: 5
  episodes_total: 2971
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03493451799024294
          cur_lr: 5.000000000000001e-05
          entropy: 1.216294334994422
          entropy_coeff: 0.009999999999999998
          kl: 0.011655971179173245
          policy_loss: -0.029036928464969
          total_loss: 0.5978476391898261
          vf_explained_var: 0.729735255241394
          vf_loss: 0.638640312022633
    num_agent_steps_sampled: 920000
    num_agent_steps_trained: 920000
    num_steps_sampled: 920000
    num_steps_trained: 920000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,920,25065.2,920000,-23.258,-19,-25.7,232.58


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 921000
  custom_metrics: {}
  date: 2021-10-22_02-43-55
  done: false
  episode_len_mean: 233.52
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -23.352000000000068
  episode_reward_min: -25.700000000000095
  episodes_this_iter: 4
  episodes_total: 2975
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03493451799024294
          cur_lr: 5.000000000000001e-05
          entropy: 1.2760489900906882
          entropy_coeff: 0.009999999999999998
          kl: 0.015471827568424128
          policy_loss: -0.0800165346927113
          total_loss: 0.37069073451889883
          vf_explained_var: 0.7813174724578857
          vf_loss: 0.4629272596703635
    num_agent_steps_sampled: 921000
    num_agent_steps_trained: 921000
    num_steps_sampled: 921000
    num_steps_trained: 921000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,921,25094,921000,-23.352,-19,-25.7,233.52


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 922000
  custom_metrics: {}
  date: 2021-10-22_02-44-23
  done: false
  episode_len_mean: 234.2
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -23.42000000000006
  episode_reward_min: -25.700000000000095
  episodes_this_iter: 4
  episodes_total: 2979
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03493451799024294
          cur_lr: 5.000000000000001e-05
          entropy: 1.218086462550693
          entropy_coeff: 0.009999999999999998
          kl: 0.01146759043696098
          policy_loss: -0.040828562445110746
          total_loss: 0.4042916917138629
          vf_explained_var: 0.7951962947845459
          vf_loss: 0.4569005032380422
    num_agent_steps_sampled: 922000
    num_agent_steps_trained: 922000
    num_steps_sampled: 922000
    num_steps_trained: 922000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,922,25122.3,922000,-23.42,-19,-25.7,234.2


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 923000
  custom_metrics: {}
  date: 2021-10-22_02-44-52
  done: false
  episode_len_mean: 235.38
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -23.538000000000064
  episode_reward_min: -25.700000000000095
  episodes_this_iter: 4
  episodes_total: 2983
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03493451799024294
          cur_lr: 5.000000000000001e-05
          entropy: 1.162435966067844
          entropy_coeff: 0.009999999999999998
          kl: 0.020107853717348058
          policy_loss: 0.00977416518661711
          total_loss: 0.4073399381712079
          vf_explained_var: 0.833829402923584
          vf_loss: 0.40848768171336913
    num_agent_steps_sampled: 923000
    num_agent_steps_trained: 923000
    num_steps_sampled: 923000
    num_steps_trained: 923000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,923,25151.1,923000,-23.538,-19.6,-25.7,235.38


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 924000
  custom_metrics: {}
  date: 2021-10-22_02-45-21
  done: false
  episode_len_mean: 236.16
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -23.616000000000067
  episode_reward_min: -25.700000000000095
  episodes_this_iter: 4
  episodes_total: 2987
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05240177698536442
          cur_lr: 5.000000000000001e-05
          entropy: 0.9996215211020576
          entropy_coeff: 0.009999999999999998
          kl: 0.009653948256252438
          policy_loss: 0.033395579291714564
          total_loss: 0.3428451473514239
          vf_explained_var: 0.8622414469718933
          vf_loss: 0.3189398967557483
    num_agent_steps_sampled: 924000
    num_agent_steps_trained: 924000
    num_steps_sampled: 924000
    num_steps_trained: 924000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,924,25180.1,924000,-23.616,-19.6,-25.7,236.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 925000
  custom_metrics: {}
  date: 2021-10-22_02-45-50
  done: false
  episode_len_mean: 236.84
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -23.68400000000007
  episode_reward_min: -25.700000000000095
  episodes_this_iter: 4
  episodes_total: 2991
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05240177698536442
          cur_lr: 5.000000000000001e-05
          entropy: 1.0037511573897469
          entropy_coeff: 0.009999999999999998
          kl: 0.015153583329210818
          policy_loss: -0.015525478952460819
          total_loss: 0.3721237723198202
          vf_explained_var: 0.7932752966880798
          vf_loss: 0.3968926876783371
    num_agent_steps_sampled: 925000
    num_agent_steps_trained: 925000
    num_steps_sampled: 925000
    num_steps_trained: 925000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,925,25209,925000,-23.684,-19.6,-25.7,236.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 926000
  custom_metrics: {}
  date: 2021-10-22_02-46-19
  done: false
  episode_len_mean: 238.05
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -23.805000000000064
  episode_reward_min: -28.000000000000128
  episodes_this_iter: 4
  episodes_total: 2995
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05240177698536442
          cur_lr: 5.000000000000001e-05
          entropy: 0.9808518363369836
          entropy_coeff: 0.009999999999999998
          kl: 0.022333831756972383
          policy_loss: -0.013686610923873053
          total_loss: 0.48357258091370264
          vf_explained_var: 0.7239968776702881
          vf_loss: 0.5058973756101396
    num_agent_steps_sampled: 926000
    num_agent_steps_trained: 926000
    num_steps_sampled: 926000
    num_steps_trained: 926000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,926,25237.6,926000,-23.805,-19.6,-28,238.05


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 927000
  custom_metrics: {}
  date: 2021-10-22_02-46-45
  done: false
  episode_len_mean: 239.9
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -23.99000000000007
  episode_reward_min: -28.000000000000128
  episodes_this_iter: 4
  episodes_total: 2999
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07860266547804663
          cur_lr: 5.000000000000001e-05
          entropy: 1.0439867145485349
          entropy_coeff: 0.009999999999999998
          kl: 0.019295059678941583
          policy_loss: -0.014544264309936099
          total_loss: 0.8238536722130245
          vf_explained_var: 0.41903001070022583
          vf_loss: 0.847321155336168
    num_agent_steps_sampled: 927000
    num_agent_steps_trained: 927000
    num_steps_sampled: 927000
    num_steps_trained: 927000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,927,25264.3,927000,-23.99,-19.6,-28,239.9




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 928000
  custom_metrics: {}
  date: 2021-10-22_02-47-32
  done: false
  episode_len_mean: 240.55
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -24.055000000000074
  episode_reward_min: -28.000000000000128
  episodes_this_iter: 3
  episodes_total: 3002
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07860266547804663
          cur_lr: 5.000000000000001e-05
          entropy: 0.933833474583096
          entropy_coeff: 0.009999999999999998
          kl: 0.04043838634564736
          policy_loss: -0.09643568247556686
          total_loss: 0.5968798372480605
          vf_explained_var: 0.48661887645721436
          vf_loss: 0.6994752844174703
    num_agent_steps_sampled: 928000
    num_agent_steps_trained: 928000
    num_steps_sampled: 928000
    num_steps_trained: 928000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,928,25311.3,928000,-24.055,-19.6,-28,240.55


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 929000
  custom_metrics: {}
  date: 2021-10-22_02-48-00
  done: false
  episode_len_mean: 242.22
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -24.222000000000072
  episode_reward_min: -28.000000000000128
  episodes_this_iter: 4
  episodes_total: 3006
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11790399821706997
          cur_lr: 5.000000000000001e-05
          entropy: 0.8606521434254116
          entropy_coeff: 0.009999999999999998
          kl: 0.0068328808627815835
          policy_loss: -0.010313646992047627
          total_loss: 0.7060728626118766
          vf_explained_var: 0.5357304811477661
          vf_loss: 0.7241874009370803
    num_agent_steps_sampled: 929000
    num_agent_steps_trained: 929000
    num_steps_sampled: 929000
    num_steps_trained: 929000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,929,25339.1,929000,-24.222,-19.6,-28,242.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 930000
  custom_metrics: {}
  date: 2021-10-22_02-48-28
  done: false
  episode_len_mean: 243.71
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.371000000000077
  episode_reward_min: -28.000000000000128
  episodes_this_iter: 4
  episodes_total: 3010
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11790399821706997
          cur_lr: 5.000000000000001e-05
          entropy: 0.7906849066416423
          entropy_coeff: 0.009999999999999998
          kl: 0.004908652347826129
          policy_loss: 0.0010935618645615047
          total_loss: 0.6929175840483771
          vf_explained_var: 0.553295373916626
          vf_loss: 0.699152119954427
    num_agent_steps_sampled: 930000
    num_agent_steps_trained: 930000
    num_steps_sampled: 930000
    num_steps_trained: 930000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,930,25367.3,930000,-24.371,-21.1,-28,243.71


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 931000
  custom_metrics: {}
  date: 2021-10-22_02-48-58
  done: false
  episode_len_mean: 244.52
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.45200000000008
  episode_reward_min: -28.000000000000128
  episodes_this_iter: 4
  episodes_total: 3014
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.058951999108534985
          cur_lr: 5.000000000000001e-05
          entropy: 0.780556552277671
          entropy_coeff: 0.009999999999999998
          kl: 0.005940621684657938
          policy_loss: -0.08152538273069594
          total_loss: 0.8830717285474141
          vf_explained_var: 0.4541076123714447
          vf_loss: 0.9720524609088897
    num_agent_steps_sampled: 931000
    num_agent_steps_trained: 931000
    num_steps_sampled: 931000
    num_steps_trained: 931000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,931,25396.6,931000,-24.452,-21.1,-28,244.52


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 932000
  custom_metrics: {}
  date: 2021-10-22_02-49-26
  done: false
  episode_len_mean: 245.57
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.557000000000077
  episode_reward_min: -28.000000000000128
  episodes_this_iter: 5
  episodes_total: 3019
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.058951999108534985
          cur_lr: 5.000000000000001e-05
          entropy: 0.818011446793874
          entropy_coeff: 0.009999999999999998
          kl: 0.011349709068792575
          policy_loss: -0.03636229145858023
          total_loss: 0.8483808232678307
          vf_explained_var: 0.6198115944862366
          vf_loss: 0.892254149251514
    num_agent_steps_sampled: 932000
    num_agent_steps_trained: 932000
    num_steps_sampled: 932000
    num_steps_trained: 932000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,932,25425.3,932000,-24.557,-21.1,-28,245.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 933000
  custom_metrics: {}
  date: 2021-10-22_02-49-55
  done: false
  episode_len_mean: 246.31
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.631000000000082
  episode_reward_min: -28.000000000000128
  episodes_this_iter: 4
  episodes_total: 3023
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.058951999108534985
          cur_lr: 5.000000000000001e-05
          entropy: 0.826243535677592
          entropy_coeff: 0.009999999999999998
          kl: 0.004415453128218555
          policy_loss: -0.02001241942246755
          total_loss: 0.811959183216095
          vf_explained_var: 0.5515410304069519
          vf_loss: 0.839973732497957
    num_agent_steps_sampled: 933000
    num_agent_steps_trained: 933000
    num_steps_sampled: 933000
    num_steps_trained: 933000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,933,25454.3,933000,-24.631,-21.1,-28,246.31


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 934000
  custom_metrics: {}
  date: 2021-10-22_02-50-26
  done: false
  episode_len_mean: 246.92
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.692000000000085
  episode_reward_min: -28.000000000000128
  episodes_this_iter: 4
  episodes_total: 3027
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.029475999554267492
          cur_lr: 5.000000000000001e-05
          entropy: 0.8941228846708934
          entropy_coeff: 0.009999999999999998
          kl: 0.012507599133526526
          policy_loss: -0.004953980114724901
          total_loss: 0.7206627640459272
          vf_explained_var: 0.6142159104347229
          vf_loss: 0.734189299080107
    num_agent_steps_sampled: 934000
    num_agent_steps_trained: 934000
    num_steps_sampled: 934000
    num_steps_trained: 934000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,934,25484.6,934000,-24.692,-21.1,-28,246.92




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 935000
  custom_metrics: {}
  date: 2021-10-22_02-51-12
  done: false
  episode_len_mean: 247.23
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.723000000000088
  episode_reward_min: -28.000000000000128
  episodes_this_iter: 4
  episodes_total: 3031
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.029475999554267492
          cur_lr: 5.000000000000001e-05
          entropy: 0.9161934541331397
          entropy_coeff: 0.009999999999999998
          kl: 0.022725787563794283
          policy_loss: 0.0008982859551906585
          total_loss: 0.6488459540738
          vf_explained_var: 0.6614317893981934
          vf_loss: 0.6564397242334153
    num_agent_steps_sampled: 935000
    num_agent_steps_trained: 935000
    num_steps_sampled: 935000
    num_steps_trained: 935000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,935,25530.9,935000,-24.723,-21.1,-28,247.23


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 936000
  custom_metrics: {}
  date: 2021-10-22_02-51-39
  done: false
  episode_len_mean: 248.48
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.848000000000088
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 4
  episodes_total: 3035
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04421399933140124
          cur_lr: 5.000000000000001e-05
          entropy: 0.7108101371261809
          entropy_coeff: 0.009999999999999998
          kl: 0.0638746076208913
          policy_loss: 0.03275815039459202
          total_loss: 1.0063746372858684
          vf_explained_var: 0.48239386081695557
          vf_loss: 0.9779004216194153
    num_agent_steps_sampled: 936000
    num_agent_steps_trained: 936000
    num_steps_sampled: 936000
    num_steps_trained: 936000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,936,25558.2,936000,-24.848,-21.1,-31.1,248.48


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 937000
  custom_metrics: {}
  date: 2021-10-22_02-52-08
  done: false
  episode_len_mean: 248.71
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.871000000000084
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 3
  episodes_total: 3038
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06632099899710187
          cur_lr: 5.000000000000001e-05
          entropy: 0.7934044612778558
          entropy_coeff: 0.009999999999999998
          kl: 0.01859145989241442
          policy_loss: -0.11304306478963957
          total_loss: 0.6408140930864547
          vf_explained_var: 0.5522533059120178
          vf_loss: 0.7605581886238522
    num_agent_steps_sampled: 937000
    num_agent_steps_trained: 937000
    num_steps_sampled: 937000
    num_steps_trained: 937000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,937,25587.1,937000,-24.871,-21.1,-31.1,248.71


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 938000
  custom_metrics: {}
  date: 2021-10-22_02-52-35
  done: false
  episode_len_mean: 250.1
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.01000000000009
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 4
  episodes_total: 3042
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06632099899710187
          cur_lr: 5.000000000000001e-05
          entropy: 0.7504537297619713
          entropy_coeff: 0.009999999999999998
          kl: 0.03730109133312741
          policy_loss: 0.006385450189312299
          total_loss: 0.8666520886951022
          vf_explained_var: 0.45195960998535156
          vf_loss: 0.8652973393599193
    num_agent_steps_sampled: 938000
    num_agent_steps_trained: 938000
    num_steps_sampled: 938000
    num_steps_trained: 938000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,938,25614.2,938000,-25.01,-21.7,-31.1,250.1


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 939000
  custom_metrics: {}
  date: 2021-10-22_02-53-04
  done: false
  episode_len_mean: 250.4
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.040000000000088
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 4
  episodes_total: 3046
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09948149849565277
          cur_lr: 5.000000000000001e-05
          entropy: 0.7596406512790256
          entropy_coeff: 0.009999999999999998
          kl: 0.02060395297583363
          policy_loss: -0.0011894514163335165
          total_loss: 0.8276791678534614
          vf_explained_var: 0.39602774381637573
          vf_loss: 0.8344153258535597
    num_agent_steps_sampled: 939000
    num_agent_steps_trained: 939000
    num_steps_sampled: 939000
    num_steps_trained: 939000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,939,25643.2,939000,-25.04,-21.7,-31.1,250.4


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 940000
  custom_metrics: {}
  date: 2021-10-22_02-53-32
  done: false
  episode_len_mean: 250.68
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -25.068000000000083
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 4
  episodes_total: 3050
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 0.7201225386725532
          entropy_coeff: 0.009999999999999998
          kl: 0.0057499683321572724
          policy_loss: 0.01640131183796459
          total_loss: 0.7670131696595086
          vf_explained_var: 0.5511376261711121
          vf_loss: 0.7569550540712144
    num_agent_steps_sampled: 940000
    num_agent_steps_trained: 940000
    num_steps_sampled: 940000
    num_steps_trained: 940000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,940,25670.3,940000,-25.068,-21.7,-31.1,250.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 941000
  custom_metrics: {}
  date: 2021-10-22_02-54-02
  done: false
  episode_len_mean: 250.34
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.034000000000084
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 4
  episodes_total: 3054
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 0.6517158110936483
          entropy_coeff: 0.009999999999999998
          kl: 0.005226306459299539
          policy_loss: 0.00785324631465806
          total_loss: 0.8884488238228692
          vf_explained_var: 0.3443608283996582
          vf_loss: 0.8863328470124139
    num_agent_steps_sampled: 941000
    num_agent_steps_trained: 941000
    num_steps_sampled: 941000
    num_steps_trained: 941000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,941,25700.4,941000,-25.034,-21.6,-31.1,250.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 942000
  custom_metrics: {}
  date: 2021-10-22_02-54-30
  done: false
  episode_len_mean: 250.23
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.023000000000085
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 4
  episodes_total: 3058
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 0.5990376359886593
          entropy_coeff: 0.009999999999999998
          kl: 0.003948677326599102
          policy_loss: -0.026787397927708095
          total_loss: 0.8181188669469621
          vf_explained_var: 0.5205975770950317
          vf_loss: 0.8503074036704169
    num_agent_steps_sampled: 942000
    num_agent_steps_trained: 942000
    num_steps_sampled: 942000
    num_steps_trained: 942000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,942,25728.3,942000,-25.023,-21.6,-31.1,250.23




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 943000
  custom_metrics: {}
  date: 2021-10-22_02-55-16
  done: false
  episode_len_mean: 249.96
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.996000000000084
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 4
  episodes_total: 3062
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07461112387173956
          cur_lr: 5.000000000000001e-05
          entropy: 0.6507592419783275
          entropy_coeff: 0.009999999999999998
          kl: 0.00886873636179691
          policy_loss: -0.12119979403085179
          total_loss: 0.9894528163803948
          vf_explained_var: 0.5306757688522339
          vf_loss: 1.1164985120296478
    num_agent_steps_sampled: 943000
    num_agent_steps_trained: 943000
    num_steps_sampled: 943000
    num_steps_trained: 943000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,943,25775,943000,-24.996,-21.6,-31.1,249.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 944000
  custom_metrics: {}
  date: 2021-10-22_02-55-45
  done: false
  episode_len_mean: 249.81
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.981000000000087
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 4
  episodes_total: 3066
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07461112387173956
          cur_lr: 5.000000000000001e-05
          entropy: 0.6556307507885827
          entropy_coeff: 0.009999999999999998
          kl: 0.010812262333788345
          policy_loss: -0.10601315225164096
          total_loss: 0.8575284659862519
          vf_explained_var: 0.5518065094947815
          vf_loss: 0.9692912101745605
    num_agent_steps_sampled: 944000
    num_agent_steps_trained: 944000
    num_steps_sampled: 944000
    num_steps_trained: 944000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,944,25803.9,944000,-24.981,-21.6,-31.1,249.81


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 945000
  custom_metrics: {}
  date: 2021-10-22_02-56-14
  done: false
  episode_len_mean: 249.89
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.989000000000093
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 5
  episodes_total: 3071
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07461112387173956
          cur_lr: 5.000000000000001e-05
          entropy: 0.5990947518083785
          entropy_coeff: 0.009999999999999998
          kl: 0.004076401165489566
          policy_loss: -0.0023522566590044236
          total_loss: 1.0080592321025001
          vf_explained_var: 0.5014845728874207
          vf_loss: 1.0160982900195652
    num_agent_steps_sampled: 945000
    num_agent_steps_trained: 945000
    num_steps_sampled: 945000
    num_steps_trained: 945000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,945,25832.2,945000,-24.989,-21.6,-31.1,249.89


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 946000
  custom_metrics: {}
  date: 2021-10-22_02-56-43
  done: false
  episode_len_mean: 249.47
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.947000000000088
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 4
  episodes_total: 3075
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03730556193586978
          cur_lr: 5.000000000000001e-05
          entropy: 0.6123130824830797
          entropy_coeff: 0.009999999999999998
          kl: 0.004359047482930438
          policy_loss: 0.0045308506323231594
          total_loss: 0.9628029386202495
          vf_explained_var: 0.3981083035469055
          vf_loss: 0.9642325964238908
    num_agent_steps_sampled: 946000
    num_agent_steps_trained: 946000
    num_steps_sampled: 946000
    num_steps_trained: 946000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,946,25861.7,946000,-24.947,-21.6,-31.1,249.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 947000
  custom_metrics: {}
  date: 2021-10-22_02-57-12
  done: false
  episode_len_mean: 249.62
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.962000000000085
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 4
  episodes_total: 3079
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01865278096793489
          cur_lr: 5.000000000000001e-05
          entropy: 0.7229129758146075
          entropy_coeff: 0.009999999999999998
          kl: 0.026750014107319624
          policy_loss: 0.012811519122785992
          total_loss: 0.7955714484055837
          vf_explained_var: 0.5577566623687744
          vf_loss: 0.7894900951120588
    num_agent_steps_sampled: 947000
    num_agent_steps_trained: 947000
    num_steps_sampled: 947000
    num_steps_trained: 947000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,947,25890.1,947000,-24.962,-21.6,-31.1,249.62


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 948000
  custom_metrics: {}
  date: 2021-10-22_02-57-41
  done: false
  episode_len_mean: 249.71
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.971000000000085
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 4
  episodes_total: 3083
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.027979171451902336
          cur_lr: 5.000000000000001e-05
          entropy: 0.6213251074155172
          entropy_coeff: 0.009999999999999998
          kl: 0.005508757631190204
          policy_loss: 0.04260530008210076
          total_loss: 0.8292514748043485
          vf_explained_var: 0.5969849824905396
          vf_loss: 0.7927053107155694
    num_agent_steps_sampled: 948000
    num_agent_steps_trained: 948000
    num_steps_sampled: 948000
    num_steps_trained: 948000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,948,25919.2,948000,-24.971,-21.6,-31.1,249.71


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 949000
  custom_metrics: {}
  date: 2021-10-22_02-58-10
  done: false
  episode_len_mean: 249.54
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.954000000000082
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 4
  episodes_total: 3087
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.027979171451902336
          cur_lr: 5.000000000000001e-05
          entropy: 0.568679079413414
          entropy_coeff: 0.009999999999999998
          kl: 0.0069696985249302025
          policy_loss: 0.056875482781065835
          total_loss: 1.124056675699022
          vf_explained_var: 0.19525201618671417
          vf_loss: 1.0726729849974315
    num_agent_steps_sampled: 949000
    num_agent_steps_trained: 949000
    num_steps_sampled: 949000
    num_steps_trained: 949000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,949,25948.3,949000,-24.954,-21.6,-31.1,249.54




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 950000
  custom_metrics: {}
  date: 2021-10-22_02-58-59
  done: false
  episode_len_mean: 248.84
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.88400000000008
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 4
  episodes_total: 3091
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.027979171451902336
          cur_lr: 5.000000000000001e-05
          entropy: 0.522081928451856
          entropy_coeff: 0.009999999999999998
          kl: 0.011702535614135732
          policy_loss: -0.02508199330833223
          total_loss: 0.8283705572287242
          vf_explained_var: 0.41226544976234436
          vf_loss: 0.8583459357420603
    num_agent_steps_sampled: 950000
    num_agent_steps_trained: 950000
    num_steps_sampled: 950000
    num_steps_trained: 950000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,950,25997.5,950000,-24.884,-21.6,-31.1,248.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 951000
  custom_metrics: {}
  date: 2021-10-22_02-59-29
  done: false
  episode_len_mean: 247.8
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.780000000000083
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 5
  episodes_total: 3096
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.027979171451902336
          cur_lr: 5.000000000000001e-05
          entropy: 0.4322956873310937
          entropy_coeff: 0.009999999999999998
          kl: 0.0037403207586711082
          policy_loss: -0.007306107878684997
          total_loss: 1.1757646653387281
          vf_explained_var: 0.33422914147377014
          vf_loss: 1.1872890869776407
    num_agent_steps_sampled: 951000
    num_agent_steps_trained: 951000
    num_steps_sampled: 951000
    num_steps_trained: 951000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,951,26027.3,951000,-24.78,-21.6,-31.1,247.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 952000
  custom_metrics: {}
  date: 2021-10-22_03-00-00
  done: false
  episode_len_mean: 246.02
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.60200000000007
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 4
  episodes_total: 3100
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013989585725951168
          cur_lr: 5.000000000000001e-05
          entropy: 0.5385758234394922
          entropy_coeff: 0.009999999999999998
          kl: 0.013064158302209547
          policy_loss: 0.0604948201113277
          total_loss: 0.9935955862204234
          vf_explained_var: 0.14312730729579926
          vf_loss: 0.9383037606875102
    num_agent_steps_sampled: 952000
    num_agent_steps_trained: 952000
    num_steps_sampled: 952000
    num_steps_trained: 952000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,952,26058.9,952000,-24.602,-21.6,-31.1,246.02


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 953000
  custom_metrics: {}
  date: 2021-10-22_03-00-26
  done: false
  episode_len_mean: 246.57
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.657000000000085
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 4
  episodes_total: 3104
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013989585725951168
          cur_lr: 5.000000000000001e-05
          entropy: 0.775564436117808
          entropy_coeff: 0.009999999999999998
          kl: 0.014982000827735457
          policy_loss: 0.01749655786487791
          total_loss: 1.2419575015703836
          vf_explained_var: 0.002095139119774103
          vf_loss: 1.2320069895850287
    num_agent_steps_sampled: 953000
    num_agent_steps_trained: 953000
    num_steps_sampled: 953000
    num_steps_trained: 953000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,953,26084.2,953000,-24.657,-21.6,-31.1,246.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 954000
  custom_metrics: {}
  date: 2021-10-22_03-00-51
  done: false
  episode_len_mean: 247.36
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.736000000000086
  episode_reward_min: -31.70000000000018
  episodes_this_iter: 3
  episodes_total: 3107
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013989585725951168
          cur_lr: 5.000000000000001e-05
          entropy: 0.7390207403235965
          entropy_coeff: 0.009999999999999998
          kl: 0.004480695847047242
          policy_loss: -0.09211578071117402
          total_loss: 1.1590667963027954
          vf_explained_var: 0.044384583830833435
          vf_loss: 1.2585101061397128
    num_agent_steps_sampled: 954000
    num_agent_steps_trained: 954000
    num_steps_sampled: 954000
    num_steps_trained: 954000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,954,26109.8,954000,-24.736,-21.6,-31.7,247.36


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 955000
  custom_metrics: {}
  date: 2021-10-22_03-01-17
  done: false
  episode_len_mean: 248.61
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.86100000000009
  episode_reward_min: -31.70000000000018
  episodes_this_iter: 4
  episodes_total: 3111
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006994792862975584
          cur_lr: 5.000000000000001e-05
          entropy: 0.6009842809703615
          entropy_coeff: 0.009999999999999998
          kl: 0.029058258555414794
          policy_loss: 0.025564554292294712
          total_loss: 1.1842097891701593
          vf_explained_var: 0.16510023176670074
          vf_loss: 1.164451821645101
    num_agent_steps_sampled: 955000
    num_agent_steps_trained: 955000
    num_steps_sampled: 955000
    num_steps_trained: 955000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,955,26135.8,955000,-24.861,-21.6,-31.7,248.61


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 956000
  custom_metrics: {}
  date: 2021-10-22_03-01-49
  done: false
  episode_len_mean: 248.0
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -24.800000000000086
  episode_reward_min: -31.70000000000018
  episodes_this_iter: 4
  episodes_total: 3115
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010492189294463378
          cur_lr: 5.000000000000001e-05
          entropy: 0.47547552519374425
          entropy_coeff: 0.009999999999999998
          kl: 0.005702700404948915
          policy_loss: -0.011967342843612035
          total_loss: 1.1539242539140913
          vf_explained_var: 0.20663867890834808
          vf_loss: 1.1705865171220569
    num_agent_steps_sampled: 956000
    num_agent_steps_trained: 956000
    num_steps_sampled: 956000
    num_steps_trained: 956000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,956,26167.4,956000,-24.8,-21.6,-31.7,248




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 957000
  custom_metrics: {}
  date: 2021-10-22_03-02-38
  done: false
  episode_len_mean: 246.38
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -24.638000000000076
  episode_reward_min: -31.70000000000018
  episodes_this_iter: 5
  episodes_total: 3120
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010492189294463378
          cur_lr: 5.000000000000001e-05
          entropy: 0.5095533874299791
          entropy_coeff: 0.009999999999999998
          kl: 0.010803393841832569
          policy_loss: -0.025811478081676695
          total_loss: 1.5038395788934495
          vf_explained_var: 0.1685103327035904
          vf_loss: 1.5346332457330492
    num_agent_steps_sampled: 957000
    num_agent_steps_trained: 957000
    num_steps_sampled: 957000
    num_steps_trained: 957000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,957,26215.9,957000,-24.638,-19.2,-31.7,246.38


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 958000
  custom_metrics: {}
  date: 2021-10-22_03-03-09
  done: false
  episode_len_mean: 246.12
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -24.612000000000084
  episode_reward_min: -31.70000000000018
  episodes_this_iter: 4
  episodes_total: 3124
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010492189294463378
          cur_lr: 5.000000000000001e-05
          entropy: 0.5792566663689084
          entropy_coeff: 0.009999999999999998
          kl: 0.014770600425022136
          policy_loss: 0.009680323882235421
          total_loss: 1.2980000138282777
          vf_explained_var: 0.13266444206237793
          vf_loss: 1.2939572877354093
    num_agent_steps_sampled: 958000
    num_agent_steps_trained: 958000
    num_steps_sampled: 958000
    num_steps_trained: 958000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,958,26247.2,958000,-24.612,-19.2,-31.7,246.12


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 959000
  custom_metrics: {}
  date: 2021-10-22_03-03-37
  done: false
  episode_len_mean: 246.62
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -24.66200000000008
  episode_reward_min: -31.70000000000018
  episodes_this_iter: 4
  episodes_total: 3128
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010492189294463378
          cur_lr: 5.000000000000001e-05
          entropy: 0.6364349901676178
          entropy_coeff: 0.009999999999999998
          kl: 0.00980690439180262
          policy_loss: 0.007850389265351825
          total_loss: 1.318612473540836
          vf_explained_var: 0.16179785132408142
          vf_loss: 1.3170235315958658
    num_agent_steps_sampled: 959000
    num_agent_steps_trained: 959000
    num_steps_sampled: 959000
    num_steps_trained: 959000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,959,26275.3,959000,-24.662,-19.2,-31.7,246.62


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 960000
  custom_metrics: {}
  date: 2021-10-22_03-04-08
  done: false
  episode_len_mean: 246.54
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -24.654000000000078
  episode_reward_min: -31.70000000000018
  episodes_this_iter: 4
  episodes_total: 3132
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010492189294463378
          cur_lr: 5.000000000000001e-05
          entropy: 0.5993720706966188
          entropy_coeff: 0.009999999999999998
          kl: 0.019031609711427363
          policy_loss: 0.0014012859927283394
          total_loss: 1.2533980396058824
          vf_explained_var: 0.20400062203407288
          vf_loss: 1.257790790663825
    num_agent_steps_sampled: 960000
    num_agent_steps_trained: 960000
    num_steps_sampled: 960000
    num_steps_trained: 960000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,960,26306.7,960000,-24.654,-19.2,-31.7,246.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 961000
  custom_metrics: {}
  date: 2021-10-22_03-04-41
  done: false
  episode_len_mean: 244.27
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -24.42700000000008
  episode_reward_min: -31.70000000000018
  episodes_this_iter: 5
  episodes_total: 3137
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010492189294463378
          cur_lr: 5.000000000000001e-05
          entropy: 0.5373156199852626
          entropy_coeff: 0.009999999999999998
          kl: 0.011887793859684987
          policy_loss: -0.002074024412367079
          total_loss: 1.509653448396259
          vf_explained_var: 0.2773720920085907
          vf_loss: 1.5169758962260351
    num_agent_steps_sampled: 961000
    num_agent_steps_trained: 961000
    num_steps_sampled: 961000
    num_steps_trained: 961000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,961,26338.9,961000,-24.427,-19.2,-31.7,244.27


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 962000
  custom_metrics: {}
  date: 2021-10-22_03-05-13
  done: false
  episode_len_mean: 242.29
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -24.229000000000077
  episode_reward_min: -31.70000000000018
  episodes_this_iter: 4
  episodes_total: 3141
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010492189294463378
          cur_lr: 5.000000000000001e-05
          entropy: 0.4803024904595481
          entropy_coeff: 0.009999999999999998
          kl: 0.004056312511427033
          policy_loss: 0.007063490566280153
          total_loss: 1.0454894423484802
          vf_explained_var: 0.23718075454235077
          vf_loss: 1.0431864069567787
    num_agent_steps_sampled: 962000
    num_agent_steps_trained: 962000
    num_steps_sampled: 962000
    num_steps_trained: 962000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,962,26370.9,962000,-24.229,-19.2,-31.7,242.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 963000
  custom_metrics: {}
  date: 2021-10-22_03-05-43
  done: false
  episode_len_mean: 241.12
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -24.11200000000007
  episode_reward_min: -31.70000000000018
  episodes_this_iter: 5
  episodes_total: 3146
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005246094647231689
          cur_lr: 5.000000000000001e-05
          entropy: 0.6066380957762401
          entropy_coeff: 0.009999999999999998
          kl: 0.03648389328799379
          policy_loss: 0.02956058399544822
          total_loss: 1.0159959673881531
          vf_explained_var: 0.5442217588424683
          vf_loss: 0.9923103657033708
    num_agent_steps_sampled: 963000
    num_agent_steps_trained: 963000
    num_steps_sampled: 963000
    num_steps_trained: 963000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,963,26401.6,963000,-24.112,-19.2,-31.7,241.12




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 964000
  custom_metrics: {}
  date: 2021-10-22_03-06-29
  done: false
  episode_len_mean: 240.19
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -24.019000000000073
  episode_reward_min: -31.70000000000018
  episodes_this_iter: 4
  episodes_total: 3150
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007869141970847534
          cur_lr: 5.000000000000001e-05
          entropy: 0.4340720213121838
          entropy_coeff: 0.009999999999999998
          kl: 0.007291917153771897
          policy_loss: 0.060217352790964976
          total_loss: 1.0336593515343135
          vf_explained_var: 0.3603641092777252
          vf_loss: 0.9777253362867567
    num_agent_steps_sampled: 964000
    num_agent_steps_trained: 964000
    num_steps_sampled: 964000
    num_steps_trained: 964000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,964,26447.3,964000,-24.019,-19.2,-31.7,240.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 965000
  custom_metrics: {}
  date: 2021-10-22_03-06-53
  done: false
  episode_len_mean: 242.34
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -24.23400000000008
  episode_reward_min: -33.0000000000002
  episodes_this_iter: 3
  episodes_total: 3153
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007869141970847534
          cur_lr: 5.000000000000001e-05
          entropy: 0.7158070756329431
          entropy_coeff: 0.009999999999999998
          kl: 0.006390713405078685
          policy_loss: -0.01498011847337087
          total_loss: 1.0914526104927063
          vf_explained_var: 0.121707983314991
          vf_loss: 1.1135405123233795
    num_agent_steps_sampled: 965000
    num_agent_steps_trained: 965000
    num_steps_sampled: 965000
    num_steps_trained: 965000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,965,26471,965000,-24.234,-19.2,-33,242.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 966000
  custom_metrics: {}
  date: 2021-10-22_03-07-13
  done: false
  episode_len_mean: 246.21
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -24.621000000000077
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 3156
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007869141970847534
          cur_lr: 5.000000000000001e-05
          entropy: 0.7082549095153808
          entropy_coeff: 0.009999999999999998
          kl: 0.021410816188348274
          policy_loss: 0.0383646541999446
          total_loss: 1.1693118976222143
          vf_explained_var: 0.0031452954281121492
          vf_loss: 1.1378612988524968
    num_agent_steps_sampled: 966000
    num_agent_steps_trained: 966000
    num_steps_sampled: 966000
    num_steps_trained: 966000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,966,26491.1,966000,-24.621,-19.2,-41.6,246.21


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 967000
  custom_metrics: {}
  date: 2021-10-22_03-07-40
  done: false
  episode_len_mean: 246.95
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -24.69500000000008
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 3160
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011803712956271303
          cur_lr: 5.000000000000001e-05
          entropy: 0.7166769411828783
          entropy_coeff: 0.009999999999999998
          kl: 0.02874330488920549
          policy_loss: 0.03106420578228103
          total_loss: 1.458896623717414
          vf_explained_var: 0.02727389894425869
          vf_loss: 1.43465991947386
    num_agent_steps_sampled: 967000
    num_agent_steps_trained: 967000
    num_steps_sampled: 967000
    num_steps_trained: 967000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,967,26518.5,967000,-24.695,-19.2,-41.6,246.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 968000
  custom_metrics: {}
  date: 2021-10-22_03-08-07
  done: false
  episode_len_mean: 247.6
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -24.760000000000076
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 3163
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017705569434406952
          cur_lr: 5.000000000000001e-05
          entropy: 0.6898912535773383
          entropy_coeff: 0.009999999999999998
          kl: 0.021923346599356186
          policy_loss: -0.07323230124182172
          total_loss: 1.3318193488650851
          vf_explained_var: 0.018656272441148758
          vf_loss: 1.4115623871485392
    num_agent_steps_sampled: 968000
    num_agent_steps_trained: 968000
    num_steps_sampled: 968000
    num_steps_trained: 968000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,968,26544.8,968000,-24.76,-19.2,-41.6,247.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 969000
  custom_metrics: {}
  date: 2021-10-22_03-08-30
  done: false
  episode_len_mean: 249.74
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -24.97400000000009
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 3167
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02655835415161042
          cur_lr: 5.000000000000001e-05
          entropy: 0.6706385413805643
          entropy_coeff: 0.009999999999999998
          kl: 0.008324549775326833
          policy_loss: 0.01612125759323438
          total_loss: 1.5209187189737956
          vf_explained_var: 0.08034312725067139
          vf_loss: 1.5112827645407783
    num_agent_steps_sampled: 969000
    num_agent_steps_trained: 969000
    num_steps_sampled: 969000
    num_steps_trained: 969000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,969,26567.9,969000,-24.974,-19.2,-41.6,249.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 970000
  custom_metrics: {}
  date: 2021-10-22_03-08-54
  done: false
  episode_len_mean: 251.57
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.157000000000092
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 3170
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02655835415161042
          cur_lr: 5.000000000000001e-05
          entropy: 0.6626164926422967
          entropy_coeff: 0.009999999999999998
          kl: 0.007184597515565687
          policy_loss: 0.04576774752802319
          total_loss: 1.1473991089397007
          vf_explained_var: -0.08309926092624664
          vf_loss: 1.1080667201015684
    num_agent_steps_sampled: 970000
    num_agent_steps_trained: 970000
    num_steps_sampled: 970000
    num_steps_trained: 970000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,970,26592.1,970000,-25.157,-19.2,-41.6,251.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 971000
  custom_metrics: {}
  date: 2021-10-22_03-09-18
  done: false
  episode_len_mean: 253.64
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.364000000000093
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 3173
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02655835415161042
          cur_lr: 5.000000000000001e-05
          entropy: 0.6145314660337237
          entropy_coeff: 0.009999999999999998
          kl: 0.012388626528230083
          policy_loss: 0.014902461568514507
          total_loss: 1.1318597654501596
          vf_explained_var: -0.24282009899616241
          vf_loss: 1.1227735933330325
    num_agent_steps_sampled: 971000
    num_agent_steps_trained: 971000
    num_steps_sampled: 971000
    num_steps_trained: 971000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,971,26615.7,971000,-25.364,-19.2,-41.6,253.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 972000
  custom_metrics: {}
  date: 2021-10-22_03-09-43
  done: false
  episode_len_mean: 255.41
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.541000000000096
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 3177
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02655835415161042
          cur_lr: 5.000000000000001e-05
          entropy: 0.5254125197728475
          entropy_coeff: 0.009999999999999998
          kl: 0.05213865799939804
          policy_loss: 0.007717281414402856
          total_loss: 1.5948019014464485
          vf_explained_var: 0.11898673325777054
          vf_loss: 1.5909540348582798
    num_agent_steps_sampled: 972000
    num_agent_steps_trained: 972000
    num_steps_sampled: 972000
    num_steps_trained: 972000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,972,26640.6,972000,-25.541,-19.2,-41.6,255.41




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 973000
  custom_metrics: {}
  date: 2021-10-22_03-10-31
  done: false
  episode_len_mean: 254.79
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.47900000000009
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 3181
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03983753122741563
          cur_lr: 5.000000000000001e-05
          entropy: 0.4005065073569616
          entropy_coeff: 0.009999999999999998
          kl: 0.016097560579043425
          policy_loss: 0.025253274622890683
          total_loss: 1.547148612472746
          vf_explained_var: 0.0684281587600708
          vf_loss: 1.5252591186099582
    num_agent_steps_sampled: 973000
    num_agent_steps_trained: 973000
    num_steps_sampled: 973000
    num_steps_trained: 973000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,973,26688.8,973000,-25.479,-19.2,-41.6,254.79


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 974000
  custom_metrics: {}
  date: 2021-10-22_03-11-02
  done: false
  episode_len_mean: 253.98
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.39800000000009
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 3185
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03983753122741563
          cur_lr: 5.000000000000001e-05
          entropy: 0.32048074420955447
          entropy_coeff: 0.009999999999999998
          kl: 0.01823974248067041
          policy_loss: -0.02915479342142741
          total_loss: 1.3955267979039085
          vf_explained_var: 0.10391674935817719
          vf_loss: 1.4271597729788885
    num_agent_steps_sampled: 974000
    num_agent_steps_trained: 974000
    num_steps_sampled: 974000
    num_steps_trained: 974000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,974,26719.5,974000,-25.398,-19.2,-41.6,253.98


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 975000
  custom_metrics: {}
  date: 2021-10-22_03-11-33
  done: false
  episode_len_mean: 253.41
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.34100000000009
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 5
  episodes_total: 3190
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03983753122741563
          cur_lr: 5.000000000000001e-05
          entropy: 0.24759107364548577
          entropy_coeff: 0.009999999999999998
          kl: 0.010529205314605229
          policy_loss: -0.017454037401411267
          total_loss: 1.6853268649843005
          vf_explained_var: 0.30108708143234253
          vf_loss: 1.7048373500506082
    num_agent_steps_sampled: 975000
    num_agent_steps_trained: 975000
    num_steps_sampled: 975000
    num_steps_trained: 975000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,975,26751.3,975000,-25.341,-19.2,-41.6,253.41


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 976000
  custom_metrics: {}
  date: 2021-10-22_03-12-06
  done: false
  episode_len_mean: 253.09
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.309000000000093
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 3194
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03983753122741563
          cur_lr: 5.000000000000001e-05
          entropy: 0.1737757906317711
          entropy_coeff: 0.009999999999999998
          kl: 0.002671036007848847
          policy_loss: -0.03435369117392434
          total_loss: 1.3251387198766074
          vf_explained_var: 0.19267480075359344
          vf_loss: 1.361123756567637
    num_agent_steps_sampled: 976000
    num_agent_steps_trained: 976000
    num_steps_sampled: 976000
    num_steps_trained: 976000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,976,26784.1,976000,-25.309,-19.2,-41.6,253.09


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 977000
  custom_metrics: {}
  date: 2021-10-22_03-12-38
  done: false
  episode_len_mean: 252.42
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.242000000000086
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 5
  episodes_total: 3199
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.019918765613707815
          cur_lr: 5.000000000000001e-05
          entropy: 0.23883135666449865
          entropy_coeff: 0.009999999999999998
          kl: 0.0032861290656433714
          policy_loss: 0.015694617852568628
          total_loss: 1.4741943425602384
          vf_explained_var: 0.2092992216348648
          vf_loss: 1.4608225888676114
    num_agent_steps_sampled: 977000
    num_agent_steps_trained: 977000
    num_steps_sampled: 977000
    num_steps_trained: 977000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,977,26815.8,977000,-25.242,-19.2,-41.6,252.42


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 978000
  custom_metrics: {}
  date: 2021-10-22_03-13-10
  done: false
  episode_len_mean: 250.47
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.04700000000009
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 5
  episodes_total: 3204
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009959382806853908
          cur_lr: 5.000000000000001e-05
          entropy: 0.22967353860537212
          entropy_coeff: 0.009999999999999998
          kl: 0.0029749610935911144
          policy_loss: -0.009277865787347158
          total_loss: 1.629038546482722
          vf_explained_var: 0.22780975699424744
          vf_loss: 1.640583531724082
    num_agent_steps_sampled: 978000
    num_agent_steps_trained: 978000
    num_steps_sampled: 978000
    num_steps_trained: 978000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,978,26848.3,978000,-25.047,-19.2,-41.6,250.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 979000
  custom_metrics: {}
  date: 2021-10-22_03-13-42
  done: false
  episode_len_mean: 247.7
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -24.77000000000008
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 3208
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.004979691403426954
          cur_lr: 5.000000000000001e-05
          entropy: 0.28476296017567315
          entropy_coeff: 0.009999999999999998
          kl: 0.0048756919411262305
          policy_loss: 0.051391727560096315
          total_loss: 0.9489693363507589
          vf_explained_var: 0.30924782156944275
          vf_loss: 0.9004009690549638
    num_agent_steps_sampled: 979000
    num_agent_steps_trained: 979000
    num_steps_sampled: 979000
    num_steps_trained: 979000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,979,26880.1,979000,-24.77,-19.2,-41.6,247.7




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 980000
  custom_metrics: {}
  date: 2021-10-22_03-14-33
  done: false
  episode_len_mean: 245.22
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.52200000000008
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 5
  episodes_total: 3213
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002489845701713477
          cur_lr: 5.000000000000001e-05
          entropy: 0.4169376868340704
          entropy_coeff: 0.009999999999999998
          kl: 0.06279080509222285
          policy_loss: 0.011964341418610679
          total_loss: 1.616921126180225
          vf_explained_var: 0.3553297221660614
          vf_loss: 1.6089698480235206
    num_agent_steps_sampled: 980000
    num_agent_steps_trained: 980000
    num_steps_sampled: 980000
    num_steps_trained: 980000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,980,26931.4,980000,-24.522,-18.8,-41.6,245.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 981000
  custom_metrics: {}
  date: 2021-10-22_03-15-06
  done: false
  episode_len_mean: 245.21
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.521000000000083
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 5
  episodes_total: 3218
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.003734768552570216
          cur_lr: 5.000000000000001e-05
          entropy: 0.198316619793574
          entropy_coeff: 0.009999999999999998
          kl: 0.008716314123638982
          policy_loss: 0.05500222047170003
          total_loss: 1.0475715114010704
          vf_explained_var: 0.26611068844795227
          vf_loss: 0.9945198992888132
    num_agent_steps_sampled: 981000
    num_agent_steps_trained: 981000
    num_steps_sampled: 981000
    num_steps_trained: 981000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,981,26964.1,981000,-24.521,-18.8,-41.6,245.21


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 982000
  custom_metrics: {}
  date: 2021-10-22_03-15-38
  done: false
  episode_len_mean: 244.91
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.491000000000078
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 3222
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.003734768552570216
          cur_lr: 5.000000000000001e-05
          entropy: 0.4225558323992623
          entropy_coeff: 0.009999999999999998
          kl: 0.04911499413192691
          policy_loss: 0.032885166257619856
          total_loss: 0.8124670247236888
          vf_explained_var: 0.31474021077156067
          vf_loss: 0.7836239920722113
    num_agent_steps_sampled: 982000
    num_agent_steps_trained: 982000
    num_steps_sampled: 982000
    num_steps_trained: 982000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,982,26995.9,982000,-24.491,-18.8,-41.6,244.91


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 983000
  custom_metrics: {}
  date: 2021-10-22_03-16-10
  done: false
  episode_len_mean: 244.22
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.42200000000008
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 5
  episodes_total: 3227
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005602152828855324
          cur_lr: 5.000000000000001e-05
          entropy: 0.362822247048219
          entropy_coeff: 0.009999999999999998
          kl: 0.010211585965659726
          policy_loss: -0.0024298669563399423
          total_loss: 0.9111246056026883
          vf_explained_var: 0.3141416311264038
          vf_loss: 0.9171254886521234
    num_agent_steps_sampled: 983000
    num_agent_steps_trained: 983000
    num_steps_sampled: 983000
    num_steps_trained: 983000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,983,27028.1,983000,-24.422,-18.8,-41.6,244.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 984000
  custom_metrics: {}
  date: 2021-10-22_03-16-40
  done: false
  episode_len_mean: 243.75
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.37500000000008
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 3231
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005602152828855324
          cur_lr: 5.000000000000001e-05
          entropy: 0.39156119724114735
          entropy_coeff: 0.009999999999999998
          kl: 0.008365103315468206
          policy_loss: 0.004217754718330171
          total_loss: 0.9587231662538317
          vf_explained_var: 0.2209596037864685
          vf_loss: 0.9583741552299924
    num_agent_steps_sampled: 984000
    num_agent_steps_trained: 984000
    num_steps_sampled: 984000
    num_steps_trained: 984000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,984,27057.9,984000,-24.375,-18.8,-41.6,243.75


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 985000
  custom_metrics: {}
  date: 2021-10-22_03-17-12
  done: false
  episode_len_mean: 243.56
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.35600000000008
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 3235
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005602152828855324
          cur_lr: 5.000000000000001e-05
          entropy: 0.29736413442426257
          entropy_coeff: 0.009999999999999998
          kl: 0.0028026145315438256
          policy_loss: 0.03031918994254536
          total_loss: 0.8619420972135332
          vf_explained_var: 0.18430818617343903
          vf_loss: 0.8345808558993869
    num_agent_steps_sampled: 985000
    num_agent_steps_trained: 985000
    num_steps_sampled: 985000
    num_steps_trained: 985000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,985,27089.9,985000,-24.356,-18.8,-41.6,243.56




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 986000
  custom_metrics: {}
  date: 2021-10-22_03-18-00
  done: false
  episode_len_mean: 243.39
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.339000000000077
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 5
  episodes_total: 3240
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002801076414427662
          cur_lr: 5.000000000000001e-05
          entropy: 0.35091817809475795
          entropy_coeff: 0.009999999999999998
          kl: 0.016690767373302152
          policy_loss: 0.008646348946624333
          total_loss: 1.2305206901497312
          vf_explained_var: 0.24022606015205383
          vf_loss: 1.225336785448922
    num_agent_steps_sampled: 986000
    num_agent_steps_trained: 986000
    num_steps_sampled: 986000
    num_steps_trained: 986000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,986,27137.4,986000,-24.339,-18.8,-41.6,243.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 987000
  custom_metrics: {}
  date: 2021-10-22_03-18-32
  done: false
  episode_len_mean: 243.99
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.399000000000083
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 3244
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002801076414427662
          cur_lr: 5.000000000000001e-05
          entropy: 0.5047870255178876
          entropy_coeff: 0.009999999999999998
          kl: 0.007170726646287543
          policy_loss: 0.033816218707296584
          total_loss: 1.0356531063715617
          vf_explained_var: 0.2231595367193222
          vf_loss: 1.006864666276508
    num_agent_steps_sampled: 987000
    num_agent_steps_trained: 987000
    num_steps_sampled: 987000
    num_steps_trained: 987000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,987,27169.9,987000,-24.399,-18.8,-41.6,243.99


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 988000
  custom_metrics: {}
  date: 2021-10-22_03-19-05
  done: false
  episode_len_mean: 243.9
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.39000000000008
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 3248
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002801076414427662
          cur_lr: 5.000000000000001e-05
          entropy: 0.5743301894929674
          entropy_coeff: 0.009999999999999998
          kl: 0.027290088531212734
          policy_loss: -0.01374442262781991
          total_loss: 0.7662117640177409
          vf_explained_var: 0.5156277418136597
          vf_loss: 0.7856230474180645
    num_agent_steps_sampled: 988000
    num_agent_steps_trained: 988000
    num_steps_sampled: 988000
    num_steps_trained: 988000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,988,27202.2,988000,-24.39,-18.8,-41.6,243.9


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 989000
  custom_metrics: {}
  date: 2021-10-22_03-19-37
  done: false
  episode_len_mean: 241.34
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.13400000000008
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 5
  episodes_total: 3253
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.004201614621641494
          cur_lr: 5.000000000000001e-05
          entropy: 0.2475827693939209
          entropy_coeff: 0.009999999999999998
          kl: 0.0044436794985974375
          policy_loss: -0.04363740732272466
          total_loss: 0.8191566619608137
          vf_explained_var: 0.6097106337547302
          vf_loss: 0.865251221259435
    num_agent_steps_sampled: 989000
    num_agent_steps_trained: 989000
    num_steps_sampled: 989000
    num_steps_trained: 989000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,989,27234.4,989000,-24.134,-18.8,-41.6,241.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 990000
  custom_metrics: {}
  date: 2021-10-22_03-20-10
  done: false
  episode_len_mean: 235.89
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -23.58900000000006
  episode_reward_min: -33.1000000000002
  episodes_this_iter: 4
  episodes_total: 3257
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002100807310820747
          cur_lr: 5.000000000000001e-05
          entropy: 0.1695770281056563
          entropy_coeff: 0.009999999999999998
          kl: 0.002877988918375745
          policy_loss: 0.02397839358697335
          total_loss: 0.7090565217865838
          vf_explained_var: 0.42804744839668274
          vf_loss: 0.6867678463459015
    num_agent_steps_sampled: 990000
    num_agent_steps_trained: 990000
    num_steps_sampled: 990000
    num_steps_trained: 990000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,990,27267.5,990000,-23.589,-18.8,-33.1,235.89


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 991000
  custom_metrics: {}
  date: 2021-10-22_03-20-40
  done: false
  episode_len_mean: 234.36
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -23.436000000000067
  episode_reward_min: -33.1000000000002
  episodes_this_iter: 5
  episodes_total: 3262
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010504036554103734
          cur_lr: 5.000000000000001e-05
          entropy: 0.22497311764293246
          entropy_coeff: 0.009999999999999998
          kl: 0.008162293016832498
          policy_loss: 0.015642584446403714
          total_loss: 0.934560536675983
          vf_explained_var: 0.3874360918998718
          vf_loss: 0.92115910715527
    num_agent_steps_sampled: 991000
    num_agent_steps_trained: 991000
    num_steps_sampled: 991000
    num_steps_trained: 991000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,991,27297.7,991000,-23.436,-18.8,-33.1,234.36


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 992000
  custom_metrics: {}
  date: 2021-10-22_03-21-12
  done: false
  episode_len_mean: 231.39
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -23.139000000000056
  episode_reward_min: -33.1000000000002
  episodes_this_iter: 4
  episodes_total: 3266
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010504036554103734
          cur_lr: 5.000000000000001e-05
          entropy: 0.2522368902133571
          entropy_coeff: 0.009999999999999998
          kl: 0.011335858229629459
          policy_loss: 0.03033276539709833
          total_loss: 0.8708288775549995
          vf_explained_var: 0.24166467785835266
          vf_loss: 0.8430065684848361
    num_agent_steps_sampled: 992000
    num_agent_steps_trained: 992000
    num_steps_sampled: 992000
    num_steps_trained: 992000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,992,27329.5,992000,-23.139,-18.8,-33.1,231.39




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 993000
  custom_metrics: {}
  date: 2021-10-22_03-21-57
  done: false
  episode_len_mean: 229.69
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -22.969000000000054
  episode_reward_min: -33.1000000000002
  episodes_this_iter: 4
  episodes_total: 3270
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010504036554103734
          cur_lr: 5.000000000000001e-05
          entropy: 0.26011087695757545
          entropy_coeff: 0.009999999999999998
          kl: 0.002782222572998055
          policy_loss: 0.02113649704390102
          total_loss: 1.029346955484814
          vf_explained_var: 0.16940757632255554
          vf_loss: 1.010808653301663
    num_agent_steps_sampled: 993000
    num_agent_steps_trained: 993000
    num_steps_sampled: 993000
    num_steps_trained: 993000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,993,27374.8,993000,-22.969,-18.8,-33.1,229.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 994000
  custom_metrics: {}
  date: 2021-10-22_03-22-29
  done: false
  episode_len_mean: 226.96
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -22.696000000000044
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 4
  episodes_total: 3274
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005252018277051867
          cur_lr: 5.000000000000001e-05
          entropy: 0.34561706599262026
          entropy_coeff: 0.009999999999999998
          kl: 0.029278766321544355
          policy_loss: -0.0053968269791868
          total_loss: 1.0742718345589108
          vf_explained_var: 0.18206575512886047
          vf_loss: 1.0831094490157234
    num_agent_steps_sampled: 994000
    num_agent_steps_trained: 994000
    num_steps_sampled: 994000
    num_steps_trained: 994000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,994,27406.3,994000,-22.696,-18.8,-32.5,226.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 995000
  custom_metrics: {}
  date: 2021-10-22_03-23-00
  done: false
  episode_len_mean: 224.95
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -22.495000000000047
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 5
  episodes_total: 3279
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007878027415577799
          cur_lr: 5.000000000000001e-05
          entropy: 0.2856422594851918
          entropy_coeff: 0.009999999999999998
          kl: 0.01230872068574224
          policy_loss: -0.011052203840679592
          total_loss: 1.1254458553261226
          vf_explained_var: 0.2925559878349304
          vf_loss: 1.1393447875976563
    num_agent_steps_sampled: 995000
    num_agent_steps_trained: 995000
    num_steps_sampled: 995000
    num_steps_trained: 995000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,995,27437.3,995000,-22.495,-18.8,-31.5,224.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 996000
  custom_metrics: {}
  date: 2021-10-22_03-23-32
  done: false
  episode_len_mean: 225.03
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -22.503000000000053
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 4
  episodes_total: 3283
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007878027415577799
          cur_lr: 5.000000000000001e-05
          entropy: 0.30190857748190564
          entropy_coeff: 0.009999999999999998
          kl: 0.004145696131297427
          policy_loss: 0.019471913162204955
          total_loss: 0.9835708340009054
          vf_explained_var: 0.23608773946762085
          vf_loss: 0.9671147372987535
    num_agent_steps_sampled: 996000
    num_agent_steps_trained: 996000
    num_steps_sampled: 996000
    num_steps_trained: 996000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,996,27469.7,996000,-22.503,-18.8,-31.5,225.03


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 997000
  custom_metrics: {}
  date: 2021-10-22_03-24-04
  done: false
  episode_len_mean: 224.87
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -22.487000000000045
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 5
  episodes_total: 3288
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00039390137077888995
          cur_lr: 5.000000000000001e-05
          entropy: 0.19057820671134526
          entropy_coeff: 0.009999999999999998
          kl: 0.0009206465494118435
          policy_loss: -0.02448599131570922
          total_loss: 1.1852591991424561
          vf_explained_var: 0.24413426220417023
          vf_loss: 1.2116506192419263
    num_agent_steps_sampled: 997000
    num_agent_steps_trained: 997000
    num_steps_sampled: 997000
    num_steps_trained: 9970

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,997,27501.3,997000,-22.487,-18.8,-31.5,224.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 998000
  custom_metrics: {}
  date: 2021-10-22_03-24-36
  done: false
  episode_len_mean: 225.07
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -22.507000000000048
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 4
  episodes_total: 3292
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00019695068538944498
          cur_lr: 5.000000000000001e-05
          entropy: 0.31041937089628646
          entropy_coeff: 0.009999999999999998
          kl: 0.025964717281273447
          policy_loss: 0.05608920305967331
          total_loss: 0.8773111681143443
          vf_explained_var: 0.31363624334335327
          vf_loss: 0.8243210504452387
    num_agent_steps_sampled: 998000
    num_agent_steps_trained: 998000
    num_steps_sampled: 998000
    num_steps_trained: 998000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,998,27533,998000,-22.507,-18.8,-31.5,225.07


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 999000
  custom_metrics: {}
  date: 2021-10-22_03-25-07
  done: false
  episode_len_mean: 225.41
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -22.541000000000054
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 5
  episodes_total: 3297
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002954260280841675
          cur_lr: 5.000000000000001e-05
          entropy: 0.14988733861181472
          entropy_coeff: 0.009999999999999998
          kl: 0.0015468163204966049
          policy_loss: -0.039223656099703574
          total_loss: 1.0663040823406644
          vf_explained_var: 0.34763509035110474
          vf_loss: 1.1070261769824559
    num_agent_steps_sampled: 999000
    num_agent_steps_trained: 999000
    num_steps_sampled: 999000
    num_steps_trained: 9990

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,999,27564.8,999000,-22.541,-18.8,-31.5,225.41




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1000000
  custom_metrics: {}
  date: 2021-10-22_03-25-58
  done: false
  episode_len_mean: 225.32
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -22.532000000000053
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 4
  episodes_total: 3301
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00014771301404208374
          cur_lr: 5.000000000000001e-05
          entropy: 0.2961037298043569
          entropy_coeff: 0.009999999999999998
          kl: 0.028966252634259525
          policy_loss: 0.03281769533124235
          total_loss: 0.9405314942200979
          vf_explained_var: 0.2991364300251007
          vf_loss: 0.9106705645720164
    num_agent_steps_sampled: 1000000
    num_agent_steps_trained: 1000000
    num_steps_sampled: 1000000
    num_steps_trained: 1000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1000,27615.4,1000000,-22.532,-18.8,-31.5,225.32


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1001000
  custom_metrics: {}
  date: 2021-10-22_03-26-29
  done: false
  episode_len_mean: 225.57
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -22.557000000000052
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 5
  episodes_total: 3306
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00022156952106312553
          cur_lr: 5.000000000000001e-05
          entropy: 0.19594131567411952
          entropy_coeff: 0.009999999999999998
          kl: 0.003158402460983388
          policy_loss: -0.012289466791682774
          total_loss: 1.1100028779771594
          vf_explained_var: 0.34644004702568054
          vf_loss: 1.1242510636647542
    num_agent_steps_sampled: 1001000
    num_agent_steps_trained: 1001000
    num_steps_sampled: 1001000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1001,27646.7,1001000,-22.557,-18.8,-31.5,225.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1002000
  custom_metrics: {}
  date: 2021-10-22_03-27-02
  done: false
  episode_len_mean: 226.04
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -22.604000000000052
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 4
  episodes_total: 3310
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00011078476053156276
          cur_lr: 5.000000000000001e-05
          entropy: 0.21493744452794392
          entropy_coeff: 0.009999999999999998
          kl: 0.005164452772542442
          policy_loss: 0.04004441665278541
          total_loss: 0.9149163835578494
          vf_explained_var: 0.28026244044303894
          vf_loss: 0.8770207762718201
    num_agent_steps_sampled: 1002000
    num_agent_steps_trained: 1002000
    num_steps_sampled: 1002000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1002,27679.1,1002000,-22.604,-19.4,-31.5,226.04


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1003000
  custom_metrics: {}
  date: 2021-10-22_03-27-32
  done: false
  episode_len_mean: 226.33
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -22.633000000000052
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 5
  episodes_total: 3315
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00011078476053156276
          cur_lr: 5.000000000000001e-05
          entropy: 0.14741643733448453
          entropy_coeff: 0.009999999999999998
          kl: 0.002426634956423691
          policy_loss: 0.010700879597829447
          total_loss: 1.0797883258925545
          vf_explained_var: 0.36199209094047546
          vf_loss: 1.0705613434314727
    num_agent_steps_sampled: 1003000
    num_agent_steps_trained: 1003000
    num_steps_sampled: 1003000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1003,27709.6,1003000,-22.633,-19.4,-31.5,226.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1004000
  custom_metrics: {}
  date: 2021-10-22_03-28-04
  done: false
  episode_len_mean: 226.55
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -22.655000000000058
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 4
  episodes_total: 3319
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.539238026578138e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.1713427082531982
          entropy_coeff: 0.009999999999999998
          kl: 0.0024247280391633077
          policy_loss: 0.016177835563818615
          total_loss: 0.9930726620886061
          vf_explained_var: 0.2720615565776825
          vf_loss: 0.9786081161763933
    num_agent_steps_sampled: 1004000
    num_agent_steps_trained: 1004000
    num_steps_sampled: 1004000
    num_steps_trained: 100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1004,27741.7,1004000,-22.655,-19.4,-31.5,226.55


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1005000
  custom_metrics: {}
  date: 2021-10-22_03-28-36
  done: false
  episode_len_mean: 226.59
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -22.659000000000052
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 5
  episodes_total: 3324
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.769619013289069e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.11877233187357585
          entropy_coeff: 0.009999999999999998
          kl: 0.0028972533164896034
          policy_loss: 0.007606806771622764
          total_loss: 1.1462747083769904
          vf_explained_var: 0.3191376030445099
          vf_loss: 1.1398555510573918
    num_agent_steps_sampled: 1005000
    num_agent_steps_trained: 1005000
    num_steps_sampled: 1005000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1005,27773,1005000,-22.659,-19.4,-31.5,226.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1006000
  custom_metrics: {}
  date: 2021-10-22_03-29-07
  done: false
  episode_len_mean: 226.22
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -22.622000000000053
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 4
  episodes_total: 3328
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3848095066445345e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.18597856817973984
          entropy_coeff: 0.009999999999999998
          kl: 0.00853559106395794
          policy_loss: 0.02791982011662589
          total_loss: 0.8052567879358927
          vf_explained_var: 0.38499680161476135
          vf_loss: 0.779196650452084
    num_agent_steps_sampled: 1006000
    num_agent_steps_trained: 1006000
    num_steps_sampled: 1006000
    num_steps_trained: 1006

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1006,27804.2,1006000,-22.622,-19.4,-31.5,226.22




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1007000
  custom_metrics: {}
  date: 2021-10-22_03-29-58
  done: false
  episode_len_mean: 225.95
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -22.59500000000005
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 5
  episodes_total: 3333
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3848095066445345e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.13775347446401914
          entropy_coeff: 0.009999999999999998
          kl: 0.0048829511185271455
          policy_loss: -0.03313448280096054
          total_loss: 1.2126644763681624
          vf_explained_var: 0.28482672572135925
          vf_loss: 1.247176429298189
    num_agent_steps_sampled: 1007000
    num_agent_steps_trained: 1007000
    num_steps_sampled: 1007000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1007,27854.8,1007000,-22.595,-19.4,-31.5,225.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1008000
  custom_metrics: {}
  date: 2021-10-22_03-30-28
  done: false
  episode_len_mean: 226.05
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -22.60500000000005
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 4
  episodes_total: 3337
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.924047533222673e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.31461307638221314
          entropy_coeff: 0.009999999999999998
          kl: 0.04282148696795976
          policy_loss: 0.04317110545105404
          total_loss: 0.860630597670873
          vf_explained_var: 0.259066104888916
          vf_loss: 0.820605324043168
    num_agent_steps_sampled: 1008000
    num_agent_steps_trained: 1008000
    num_steps_sampled: 1008000
    num_steps_trained: 1008000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1008,27885.7,1008000,-22.605,-19.4,-31.5,226.05


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1009000
  custom_metrics: {}
  date: 2021-10-22_03-30-58
  done: false
  episode_len_mean: 227.2
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -22.72000000000005
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 4
  episodes_total: 3341
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0386071299834015e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.21147192137108908
          entropy_coeff: 0.009999999999999998
          kl: 0.00856847566683759
          policy_loss: 0.036232749289936486
          total_loss: 1.0733126315805648
          vf_explained_var: 0.1417202353477478
          vf_loss: 1.039194525612725
    num_agent_steps_sampled: 1009000
    num_agent_steps_trained: 1009000
    num_steps_sampled: 1009000
    num_steps_trained: 100900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1009,27915.2,1009000,-22.72,-19.4,-31.5,227.2


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1010000
  custom_metrics: {}
  date: 2021-10-22_03-31-30
  done: false
  episode_len_mean: 226.61
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -22.661000000000055
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 5
  episodes_total: 3346
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0386071299834015e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.22717881740795243
          entropy_coeff: 0.009999999999999998
          kl: 0.0230468355645397
          policy_loss: -0.01780530396434996
          total_loss: 1.370067725578944
          vf_explained_var: 0.1834356039762497
          vf_loss: 1.3901445978217655
    num_agent_steps_sampled: 1010000
    num_agent_steps_trained: 1010000
    num_steps_sampled: 1010000
    num_steps_trained: 10100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1010,27947.1,1010000,-22.661,-19.4,-31.5,226.61


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1011000
  custom_metrics: {}
  date: 2021-10-22_03-32-01
  done: false
  episode_len_mean: 226.33
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -22.63300000000005
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 4
  episodes_total: 3350
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5579106949751024e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.28146682547198404
          entropy_coeff: 0.009999999999999998
          kl: 0.025142501051291472
          policy_loss: 0.04174002442095015
          total_loss: 0.9734593331813812
          vf_explained_var: 0.23725982010364532
          vf_loss: 0.9345335774951511
    num_agent_steps_sampled: 1011000
    num_agent_steps_trained: 1011000
    num_steps_sampled: 1011000
    num_steps_trained: 101

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1011,27978.1,1011000,-22.633,-19.4,-31.5,226.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1012000
  custom_metrics: {}
  date: 2021-10-22_03-32-28
  done: false
  episode_len_mean: 228.13
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -22.813000000000056
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 4
  episodes_total: 3354
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.336866042462653e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.404826737774743
          entropy_coeff: 0.009999999999999998
          kl: 0.009715236206849593
          policy_loss: 0.03299308725529247
          total_loss: 1.2761726909213595
          vf_explained_var: 0.07318798452615738
          vf_loss: 1.247227660814921
    num_agent_steps_sampled: 1012000
    num_agent_steps_trained: 1012000
    num_steps_sampled: 1012000
    num_steps_trained: 101200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1012,28004.7,1012000,-22.813,-19.4,-31.5,228.13


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1013000
  custom_metrics: {}
  date: 2021-10-22_03-32-57
  done: false
  episode_len_mean: 228.99
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -22.899000000000054
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 4
  episodes_total: 3358
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.336866042462653e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.3719404059979651
          entropy_coeff: 0.009999999999999998
          kl: 0.027959307999535374
          policy_loss: 0.02874743135439025
          total_loss: 1.1820789959695603
          vf_explained_var: 0.15387199819087982
          vf_loss: 1.1570503049426608
    num_agent_steps_sampled: 1013000
    num_agent_steps_trained: 1013000
    num_steps_sampled: 1013000
    num_steps_trained: 1013

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1013,28033.9,1013000,-22.899,-19.4,-31.5,228.99




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1014000
  custom_metrics: {}
  date: 2021-10-22_03-33-37
  done: false
  episode_len_mean: 231.21
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -23.121000000000063
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 3
  episodes_total: 3361
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5052990636939805e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.37251120971308815
          entropy_coeff: 0.009999999999999998
          kl: 0.022429952027867885
          policy_loss: -0.003343990527921253
          total_loss: 0.6614313112364875
          vf_explained_var: 0.16800084710121155
          vf_loss: 0.6684996328523589
    num_agent_steps_sampled: 1014000
    num_agent_steps_trained: 1014000
    num_steps_sampled: 1014000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1014,28074.4,1014000,-23.121,-19.4,-43.2,231.21


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1015000
  custom_metrics: {}
  date: 2021-10-22_03-34-03
  done: false
  episode_len_mean: 234.23
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -23.423000000000055
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 3
  episodes_total: 3364
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.2579485955409694e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5343726423051622
          entropy_coeff: 0.009999999999999998
          kl: 0.24110926357013718
          policy_loss: -0.03052479616469807
          total_loss: 0.6987072311341762
          vf_explained_var: -0.07716137915849686
          vf_loss: 0.7345630596909258
    num_agent_steps_sampled: 1015000
    num_agent_steps_trained: 1015000
    num_steps_sampled: 1015000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1015,28099.8,1015000,-23.423,-19.4,-43.2,234.23


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1016000
  custom_metrics: {}
  date: 2021-10-22_03-34-32
  done: false
  episode_len_mean: 233.58
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -23.358000000000064
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 5
  episodes_total: 3369
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.886922893311451e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.3565402607123057
          entropy_coeff: 0.009999999999999998
          kl: 0.1959882565115655
          policy_loss: -0.041655404741565386
          total_loss: 0.8358918204903603
          vf_explained_var: 0.6796792149543762
          vf_loss: 0.8810971650812361
    num_agent_steps_sampled: 1016000
    num_agent_steps_trained: 1016000
    num_steps_sampled: 1016000
    num_steps_trained: 10160

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1016,28129.3,1016000,-23.358,-19.4,-43.2,233.58


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1017000
  custom_metrics: {}
  date: 2021-10-22_03-35-04
  done: false
  episode_len_mean: 233.8
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -23.38000000000006
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 4
  episodes_total: 3373
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001183038433996718
          cur_lr: 5.000000000000001e-05
          entropy: 0.485237206849787
          entropy_coeff: 0.009999999999999998
          kl: 0.09688960462779579
          policy_loss: 0.0008872804128461414
          total_loss: 0.5357875612046984
          vf_explained_var: 0.6858328580856323
          vf_loss: 0.5397411929236517
    num_agent_steps_sampled: 1017000
    num_agent_steps_trained: 1017000
    num_steps_sampled: 1017000
    num_steps_trained: 1017000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1017,28160.8,1017000,-23.38,-19.4,-43.2,233.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1018000
  custom_metrics: {}
  date: 2021-10-22_03-35-36
  done: false
  episode_len_mean: 233.71
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -23.37100000000006
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 4
  episodes_total: 3377
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00017745576509950768
          cur_lr: 5.000000000000001e-05
          entropy: 0.6730357077386644
          entropy_coeff: 0.009999999999999998
          kl: 0.09940338351152257
          policy_loss: 0.014258905831310485
          total_loss: 0.5285482191377215
          vf_explained_var: 0.6428329348564148
          vf_loss: 0.5210020313660304
    num_agent_steps_sampled: 1018000
    num_agent_steps_trained: 1018000
    num_steps_sampled: 1018000
    num_steps_trained: 10180

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1018,28192.8,1018000,-23.371,-19.4,-43.2,233.71


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1019000
  custom_metrics: {}
  date: 2021-10-22_03-35-59
  done: false
  episode_len_mean: 236.22
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -23.62200000000006
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 3
  episodes_total: 3380
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00026618364764926155
          cur_lr: 5.000000000000001e-05
          entropy: 0.8828982869784038
          entropy_coeff: 0.009999999999999998
          kl: 0.19918474536778047
          policy_loss: 0.0029179548223813375
          total_loss: 0.5698787808418274
          vf_explained_var: 0.5647047758102417
          vf_loss: 0.5757367899020512
    num_agent_steps_sampled: 1019000
    num_agent_steps_trained: 1019000
    num_steps_sampled: 1019000
    num_steps_trained: 1019

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1019,28216.1,1019000,-23.622,-19.4,-43.2,236.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1020000
  custom_metrics: {}
  date: 2021-10-22_03-36-20
  done: false
  episode_len_mean: 239.99
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -23.99900000000007
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 3
  episodes_total: 3383
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00039927547147389225
          cur_lr: 5.000000000000001e-05
          entropy: 0.7049652516841889
          entropy_coeff: 0.009999999999999998
          kl: 0.04651635592814689
          policy_loss: 0.09914429204331504
          total_loss: 0.6148485312859218
          vf_explained_var: 0.4404296576976776
          vf_loss: 0.5227353174653319
    num_agent_steps_sampled: 1020000
    num_agent_steps_trained: 1020000
    num_steps_sampled: 1020000
    num_steps_trained: 102000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1020,28236.8,1020000,-23.999,-19.4,-43.2,239.99


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1021000
  custom_metrics: {}
  date: 2021-10-22_03-36-45
  done: false
  episode_len_mean: 242.61
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -24.26100000000007
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 3
  episodes_total: 3386
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005989132072108384
          cur_lr: 5.000000000000001e-05
          entropy: 0.9190269258287218
          entropy_coeff: 0.009999999999999998
          kl: 0.017498179428580823
          policy_loss: -0.05086681428882811
          total_loss: 0.5272952298323313
          vf_explained_var: 0.6005541682243347
          vf_loss: 0.5873418364259931
    num_agent_steps_sampled: 1021000
    num_agent_steps_trained: 1021000
    num_steps_sampled: 1021000
    num_steps_trained: 10210

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1021,28261.9,1021000,-24.261,-19.4,-43.2,242.61




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1022000
  custom_metrics: {}
  date: 2021-10-22_03-37-24
  done: false
  episode_len_mean: 245.64
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -24.564000000000075
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 4
  episodes_total: 3390
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005989132072108384
          cur_lr: 5.000000000000001e-05
          entropy: 0.9472905152373844
          entropy_coeff: 0.009999999999999998
          kl: 0.01716440901589517
          policy_loss: -0.0014099367790751988
          total_loss: 0.5800361464420954
          vf_explained_var: 0.5350775122642517
          vf_loss: 0.5909087051947911
    num_agent_steps_sampled: 1022000
    num_agent_steps_trained: 1022000
    num_steps_sampled: 1022000
    num_steps_trained: 102

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1022,28301.1,1022000,-24.564,-19.4,-43.2,245.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1023000
  custom_metrics: {}
  date: 2021-10-22_03-37-50
  done: false
  episode_len_mean: 247.85
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -24.78500000000008
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 3
  episodes_total: 3393
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005989132072108384
          cur_lr: 5.000000000000001e-05
          entropy: 1.047354198826684
          entropy_coeff: 0.009999999999999998
          kl: 0.029010830667514594
          policy_loss: 0.05857364022069507
          total_loss: 0.5562852501869202
          vf_explained_var: 0.5453206300735474
          vf_loss: 0.5081677796526087
    num_agent_steps_sampled: 1023000
    num_agent_steps_trained: 1023000
    num_steps_sampled: 1023000
    num_steps_trained: 1023000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1023,28327.2,1023000,-24.785,-19.4,-43.2,247.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1024000
  custom_metrics: {}
  date: 2021-10-22_03-38-13
  done: false
  episode_len_mean: 250.7
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -25.070000000000086
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 3
  episodes_total: 3396
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008983698108162575
          cur_lr: 5.000000000000001e-05
          entropy: 1.0753541860315534
          entropy_coeff: 0.009999999999999998
          kl: 0.0168125529679508
          policy_loss: 0.06421440301669969
          total_loss: 0.6729889283577601
          vf_explained_var: 0.4457472860813141
          vf_loss: 0.6195129498425457
    num_agent_steps_sampled: 1024000
    num_agent_steps_trained: 1024000
    num_steps_sampled: 1024000
    num_steps_trained: 1024000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1024,28349.8,1024000,-25.07,-19.4,-43.2,250.7


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1025000
  custom_metrics: {}
  date: 2021-10-22_03-38-36
  done: false
  episode_len_mean: 253.63
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -25.36300000000009
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 3
  episodes_total: 3399
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008983698108162575
          cur_lr: 5.000000000000001e-05
          entropy: 0.9681602213117811
          entropy_coeff: 0.009999999999999998
          kl: 0.014874594837772292
          policy_loss: 0.022146377050214345
          total_loss: 0.6689985556734933
          vf_explained_var: 0.237193301320076
          vf_loss: 0.6565204210579395
    num_agent_steps_sampled: 1025000
    num_agent_steps_trained: 1025000
    num_steps_sampled: 1025000
    num_steps_trained: 102500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1025,28372.5,1025000,-25.363,-19.4,-43.2,253.63


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1026000
  custom_metrics: {}
  date: 2021-10-22_03-39-01
  done: false
  episode_len_mean: 256.7
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -25.670000000000094
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 4
  episodes_total: 3403
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008983698108162575
          cur_lr: 5.000000000000001e-05
          entropy: 0.806609586874644
          entropy_coeff: 0.009999999999999998
          kl: 0.011954925737794832
          policy_loss: -0.00601580614844958
          total_loss: 0.9333792606989543
          vf_explained_var: 0.36103054881095886
          vf_loss: 0.9474504292011261
    num_agent_steps_sampled: 1026000
    num_agent_steps_trained: 1026000
    num_steps_sampled: 1026000
    num_steps_trained: 10260

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1026,28398,1026000,-25.67,-19.5,-43.2,256.7


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1027000
  custom_metrics: {}
  date: 2021-10-22_03-39-27
  done: false
  episode_len_mean: 258.33
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -25.833000000000094
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 3
  episodes_total: 3406
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008983698108162575
          cur_lr: 5.000000000000001e-05
          entropy: 0.9360463248358832
          entropy_coeff: 0.009999999999999998
          kl: 0.020837795470144412
          policy_loss: 0.026450460818078784
          total_loss: 0.7933266338374879
          vf_explained_var: 0.2543063759803772
          vf_loss: 0.776217919588089
    num_agent_steps_sampled: 1027000
    num_agent_steps_trained: 1027000
    num_steps_sampled: 1027000
    num_steps_trained: 10270

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1027,28424.4,1027000,-25.833,-19.5,-43.2,258.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1028000
  custom_metrics: {}
  date: 2021-10-22_03-39-53
  done: false
  episode_len_mean: 260.22
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -26.0220000000001
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 4
  episodes_total: 3410
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013475547162243864
          cur_lr: 5.000000000000001e-05
          entropy: 0.9099913239479065
          entropy_coeff: 0.009999999999999998
          kl: 0.008552023553997243
          policy_loss: 0.01928875520825386
          total_loss: 1.0805469069215987
          vf_explained_var: 0.2173592895269394
          vf_loss: 1.0703465316030714
    num_agent_steps_sampled: 1028000
    num_agent_steps_trained: 1028000
    num_steps_sampled: 1028000
    num_steps_trained: 1028000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1028,28450.2,1028000,-26.022,-19.5,-43.2,260.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1029000
  custom_metrics: {}
  date: 2021-10-22_03-40-18
  done: false
  episode_len_mean: 262.49
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -26.249000000000105
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 3
  episodes_total: 3413
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013475547162243864
          cur_lr: 5.000000000000001e-05
          entropy: 1.0614394505818685
          entropy_coeff: 0.009999999999999998
          kl: 0.0119749741418882
          policy_loss: -0.06276223883032798
          total_loss: 1.0639110055234697
          vf_explained_var: 0.13429884612560272
          vf_loss: 1.137271507581075
    num_agent_steps_sampled: 1029000
    num_agent_steps_trained: 1029000
    num_steps_sampled: 1029000
    num_steps_trained: 102900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1029,28474.6,1029000,-26.249,-19.5,-43.2,262.49


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1030000
  custom_metrics: {}
  date: 2021-10-22_03-40-42
  done: false
  episode_len_mean: 265.55
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -26.55500000000011
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 4
  episodes_total: 3417
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013475547162243864
          cur_lr: 5.000000000000001e-05
          entropy: 0.949347409274843
          entropy_coeff: 0.009999999999999998
          kl: 0.01743161993845652
          policy_loss: -0.006406699948840671
          total_loss: 1.1237191571129692
          vf_explained_var: 0.23802879452705383
          vf_loss: 1.1395958403746287
    num_agent_steps_sampled: 1030000
    num_agent_steps_trained: 1030000
    num_steps_sampled: 1030000
    num_steps_trained: 10300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1030,28499.2,1030000,-26.555,-19.5,-43.2,265.55




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1031000
  custom_metrics: {}
  date: 2021-10-22_03-41-26
  done: false
  episode_len_mean: 266.78
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -26.678000000000107
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 3
  episodes_total: 3420
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013475547162243864
          cur_lr: 5.000000000000001e-05
          entropy: 0.9733137746651968
          entropy_coeff: 0.009999999999999998
          kl: 0.060609445771758017
          policy_loss: -0.06178984062539206
          total_loss: 0.9091675003369649
          vf_explained_var: 0.3574918508529663
          vf_loss: 0.9806088036961026
    num_agent_steps_sampled: 1031000
    num_agent_steps_trained: 1031000
    num_steps_sampled: 1031000
    num_steps_trained: 1031

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1031,28542.8,1031000,-26.678,-19.5,-43.2,266.78


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1032000
  custom_metrics: {}
  date: 2021-10-22_03-41-52
  done: false
  episode_len_mean: 268.84
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -26.884000000000114
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 4
  episodes_total: 3424
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0020213320743365797
          cur_lr: 5.000000000000001e-05
          entropy: 0.7965395119455125
          entropy_coeff: 0.009999999999999998
          kl: 0.009631512188234126
          policy_loss: 0.029751692960659663
          total_loss: 0.882144320011139
          vf_explained_var: 0.4555709958076477
          vf_loss: 0.8603385481569502
    num_agent_steps_sampled: 1032000
    num_agent_steps_trained: 1032000
    num_steps_sampled: 1032000
    num_steps_trained: 10320

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1032,28569.1,1032000,-26.884,-19.5,-43.2,268.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1033000
  custom_metrics: {}
  date: 2021-10-22_03-42-20
  done: false
  episode_len_mean: 270.21
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -27.021000000000114
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 4
  episodes_total: 3428
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0020213320743365797
          cur_lr: 5.000000000000001e-05
          entropy: 0.7186814480357699
          entropy_coeff: 0.009999999999999998
          kl: 0.006985672393039029
          policy_loss: 0.08302639532420371
          total_loss: 0.8265201985836029
          vf_explained_var: 0.5102534890174866
          vf_loss: 0.7506664911905925
    num_agent_steps_sampled: 1033000
    num_agent_steps_trained: 1033000
    num_steps_sampled: 1033000
    num_steps_trained: 10330

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1033,28596.4,1033000,-27.021,-19.5,-43.2,270.21


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1034000
  custom_metrics: {}
  date: 2021-10-22_03-42-46
  done: false
  episode_len_mean: 272.17
  episode_media: {}
  episode_reward_max: -19.80000000000001
  episode_reward_mean: -27.217000000000116
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 3
  episodes_total: 3431
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0020213320743365797
          cur_lr: 5.000000000000001e-05
          entropy: 0.7590840273433261
          entropy_coeff: 0.009999999999999998
          kl: 0.012114640174787894
          policy_loss: -0.08204607814550399
          total_loss: 0.7787557257546319
          vf_explained_var: 0.4642021954059601
          vf_loss: 0.8683681620491875
    num_agent_steps_sampled: 1034000
    num_agent_steps_trained: 1034000
    num_steps_sampled: 1034000
    num_steps_trained: 10340

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1034,28622.6,1034000,-27.217,-19.8,-43.2,272.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1035000
  custom_metrics: {}
  date: 2021-10-22_03-43-11
  done: false
  episode_len_mean: 274.67
  episode_media: {}
  episode_reward_max: -19.80000000000001
  episode_reward_mean: -27.46700000000012
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 4
  episodes_total: 3435
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0020213320743365797
          cur_lr: 5.000000000000001e-05
          entropy: 0.7741901914278666
          entropy_coeff: 0.009999999999999998
          kl: 0.014258250224308237
          policy_loss: 0.011146207236581378
          total_loss: 1.0795986314614614
          vf_explained_var: 0.34154534339904785
          vf_loss: 1.0761654992898306
    num_agent_steps_sampled: 1035000
    num_agent_steps_trained: 1035000
    num_steps_sampled: 1035000
    num_steps_trained: 10350

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1035,28647.7,1035000,-27.467,-19.8,-43.2,274.67


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1036000
  custom_metrics: {}
  date: 2021-10-22_03-43-36
  done: false
  episode_len_mean: 276.5
  episode_media: {}
  episode_reward_max: -19.80000000000001
  episode_reward_mean: -27.650000000000123
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 3
  episodes_total: 3438
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0020213320743365797
          cur_lr: 5.000000000000001e-05
          entropy: 0.783789743979772
          entropy_coeff: 0.009999999999999998
          kl: 0.009438056669929454
          policy_loss: -0.08070991809169452
          total_loss: 1.1551461425092486
          vf_explained_var: 0.16931457817554474
          vf_loss: 1.2436748888757494
    num_agent_steps_sampled: 1036000
    num_agent_steps_trained: 1036000
    num_steps_sampled: 1036000
    num_steps_trained: 103600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1036,28672.6,1036000,-27.65,-19.8,-43.2,276.5


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1037000
  custom_metrics: {}
  date: 2021-10-22_03-44-01
  done: false
  episode_len_mean: 278.08
  episode_media: {}
  episode_reward_max: -19.80000000000001
  episode_reward_mean: -27.808000000000128
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 4
  episodes_total: 3442
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0020213320743365797
          cur_lr: 5.000000000000001e-05
          entropy: 0.644065633085039
          entropy_coeff: 0.009999999999999998
          kl: 0.00442633304342558
          policy_loss: 0.038097265279955335
          total_loss: 1.1903796937730577
          vf_explained_var: 0.2665708363056183
          vf_loss: 1.158714132838779
    num_agent_steps_sampled: 1037000
    num_agent_steps_trained: 1037000
    num_steps_sampled: 1037000
    num_steps_trained: 1037000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1037,28697.8,1037000,-27.808,-19.8,-43.2,278.08


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1038000
  custom_metrics: {}
  date: 2021-10-22_03-44-27
  done: false
  episode_len_mean: 279.72
  episode_media: {}
  episode_reward_max: -19.80000000000001
  episode_reward_mean: -27.972000000000126
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 3
  episodes_total: 3445
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010106660371682898
          cur_lr: 5.000000000000001e-05
          entropy: 0.7293024798234303
          entropy_coeff: 0.009999999999999998
          kl: 0.0059941833045295805
          policy_loss: -0.07160576689574454
          total_loss: 1.1385190533267127
          vf_explained_var: 0.2631196975708008
          vf_loss: 1.2174117843310037
    num_agent_steps_sampled: 1038000
    num_agent_steps_trained: 1038000
    num_steps_sampled: 1038000
    num_steps_trained: 1038

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1038,28723.8,1038000,-27.972,-19.8,-43.2,279.72


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1039000
  custom_metrics: {}
  date: 2021-10-22_03-44-52
  done: false
  episode_len_mean: 282.02
  episode_media: {}
  episode_reward_max: -19.80000000000001
  episode_reward_mean: -28.202000000000126
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 4
  episodes_total: 3449
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010106660371682898
          cur_lr: 5.000000000000001e-05
          entropy: 0.725166431400511
          entropy_coeff: 0.009999999999999998
          kl: 0.01398193789621279
          policy_loss: -9.491985870732201e-05
          total_loss: 1.1350180427233378
          vf_explained_var: 0.3427269756793976
          vf_loss: 1.1423504922125074
    num_agent_steps_sampled: 1039000
    num_agent_steps_trained: 1039000
    num_steps_sampled: 1039000
    num_steps_trained: 10390

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1039,28748.7,1039000,-28.202,-19.8,-43.2,282.02




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1040000
  custom_metrics: {}
  date: 2021-10-22_03-45-36
  done: false
  episode_len_mean: 283.22
  episode_media: {}
  episode_reward_max: -19.80000000000001
  episode_reward_mean: -28.322000000000134
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 4
  episodes_total: 3453
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010106660371682898
          cur_lr: 5.000000000000001e-05
          entropy: 0.7912001768747966
          entropy_coeff: 0.009999999999999998
          kl: 0.0294756165982335
          policy_loss: -0.008918413188722399
          total_loss: 1.2101524935828314
          vf_explained_var: 0.2818225026130676
          vf_loss: 1.2269531144036188
    num_agent_steps_sampled: 1040000
    num_agent_steps_trained: 1040000
    num_steps_sampled: 1040000
    num_steps_trained: 104000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1040,28792.2,1040000,-28.322,-19.8,-43.2,283.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1041000
  custom_metrics: {}
  date: 2021-10-22_03-46-02
  done: false
  episode_len_mean: 283.56
  episode_media: {}
  episode_reward_max: -19.80000000000001
  episode_reward_mean: -28.35600000000013
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 3
  episodes_total: 3456
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001515999055752435
          cur_lr: 5.000000000000001e-05
          entropy: 0.6700290424956216
          entropy_coeff: 0.009999999999999998
          kl: 0.02758083508924969
          policy_loss: 0.03777671754360199
          total_loss: 0.903848515285386
          vf_explained_var: 0.409606009721756
          vf_loss: 0.8727302710215251
    num_agent_steps_sampled: 1041000
    num_agent_steps_trained: 1041000
    num_steps_sampled: 1041000
    num_steps_trained: 1041000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1041,28819,1041000,-28.356,-19.8,-43.2,283.56


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1042000
  custom_metrics: {}
  date: 2021-10-22_03-46-30
  done: false
  episode_len_mean: 284.62
  episode_media: {}
  episode_reward_max: -22.50000000000005
  episode_reward_mean: -28.46200000000014
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 4
  episodes_total: 3460
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0022739985836286516
          cur_lr: 5.000000000000001e-05
          entropy: 0.4982625785801146
          entropy_coeff: 0.009999999999999998
          kl: 0.008065908478698387
          policy_loss: 0.01895996928215027
          total_loss: 1.0855430768595802
          vf_explained_var: 0.3094317317008972
          vf_loss: 1.0715473764472538
    num_agent_steps_sampled: 1042000
    num_agent_steps_trained: 1042000
    num_steps_sampled: 1042000
    num_steps_trained: 1042000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1042,28846.4,1042000,-28.462,-22.5,-43.2,284.62


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1043000
  custom_metrics: {}
  date: 2021-10-22_03-46-57
  done: false
  episode_len_mean: 280.78
  episode_media: {}
  episode_reward_max: -22.60000000000005
  episode_reward_mean: -28.078000000000124
  episode_reward_min: -42.30000000000033
  episodes_this_iter: 4
  episodes_total: 3464
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0022739985836286516
          cur_lr: 5.000000000000001e-05
          entropy: 0.5795333209964965
          entropy_coeff: 0.009999999999999998
          kl: 0.022382136312838145
          policy_loss: 0.04884369108412001
          total_loss: 0.9479467683368259
          vf_explained_var: 0.333936482667923
          vf_loss: 0.9048475152916379
    num_agent_steps_sampled: 1043000
    num_agent_steps_trained: 1043000
    num_steps_sampled: 1043000
    num_steps_trained: 1043000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1043,28874,1043000,-28.078,-22.6,-42.3,280.78


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1044000
  custom_metrics: {}
  date: 2021-10-22_03-47-26
  done: false
  episode_len_mean: 281.33
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.13300000000013
  episode_reward_min: -42.30000000000033
  episodes_this_iter: 4
  episodes_total: 3468
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0034109978754429783
          cur_lr: 5.000000000000001e-05
          entropy: 0.5351100163327323
          entropy_coeff: 0.009999999999999998
          kl: 0.041813523044246244
          policy_loss: 0.009070833689636654
          total_loss: 0.9449056976371342
          vf_explained_var: 0.45509782433509827
          vf_loss: 0.9410433325502607
    num_agent_steps_sampled: 1044000
    num_agent_steps_trained: 1044000
    num_steps_sampled: 1044000
    num_steps_trained: 10440

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1044,28902.3,1044000,-28.133,-22.8,-42.3,281.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1045000
  custom_metrics: {}
  date: 2021-10-22_03-47-53
  done: false
  episode_len_mean: 282.26
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -28.226000000000134
  episode_reward_min: -42.30000000000033
  episodes_this_iter: 4
  episodes_total: 3472
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005116496813164466
          cur_lr: 5.000000000000001e-05
          entropy: 0.47018247014946407
          entropy_coeff: 0.009999999999999998
          kl: 0.01433742856610978
          policy_loss: 0.03720629202822844
          total_loss: 0.9596200313833024
          vf_explained_var: 0.48419031500816345
          vf_loss: 0.9270421902338664
    num_agent_steps_sampled: 1045000
    num_agent_steps_trained: 1045000
    num_steps_sampled: 1045000
    num_steps_trained: 104500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1045,28929.3,1045000,-28.226,-22.9,-42.3,282.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1046000
  custom_metrics: {}
  date: 2021-10-22_03-48-21
  done: false
  episode_len_mean: 283.1
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -28.310000000000137
  episode_reward_min: -42.30000000000033
  episodes_this_iter: 4
  episodes_total: 3476
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005116496813164466
          cur_lr: 5.000000000000001e-05
          entropy: 0.5682525320185555
          entropy_coeff: 0.009999999999999998
          kl: 0.05345859441486627
          policy_loss: 0.0346707198354933
          total_loss: 0.7471803579065535
          vf_explained_var: 0.5905519723892212
          vf_loss: 0.717918634083536
    num_agent_steps_sampled: 1046000
    num_agent_steps_trained: 1046000
    num_steps_sampled: 1046000
    num_steps_trained: 1046000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1046,28957.3,1046000,-28.31,-22.9,-42.3,283.1




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1047000
  custom_metrics: {}
  date: 2021-10-22_03-49-06
  done: false
  episode_len_mean: 281.04
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -28.104000000000127
  episode_reward_min: -42.30000000000033
  episodes_this_iter: 4
  episodes_total: 3480
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007674745219746701
          cur_lr: 5.000000000000001e-05
          entropy: 0.5202301392952601
          entropy_coeff: 0.009999999999999998
          kl: 0.05429029978550517
          policy_loss: 0.04449144792225626
          total_loss: 0.6581546783447265
          vf_explained_var: 0.6808134913444519
          vf_loss: 0.6184488657448027
    num_agent_steps_sampled: 1047000
    num_agent_steps_trained: 1047000
    num_steps_sampled: 1047000
    num_steps_trained: 1047000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1047,29002.9,1047000,-28.104,-21.9,-42.3,281.04


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1048000
  custom_metrics: {}
  date: 2021-10-22_03-49-37
  done: false
  episode_len_mean: 277.13
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -27.71300000000012
  episode_reward_min: -35.500000000000234
  episodes_this_iter: 4
  episodes_total: 3484
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011512117829620052
          cur_lr: 5.000000000000001e-05
          entropy: 0.3981425103214052
          entropy_coeff: 0.009999999999999998
          kl: 0.01746038349698406
          policy_loss: -0.0348713015516599
          total_loss: 0.4566930681467056
          vf_explained_var: 0.770221471786499
          vf_loss: 0.4953447880016433
    num_agent_steps_sampled: 1048000
    num_agent_steps_trained: 1048000
    num_steps_sampled: 1048000
    num_steps_trained: 1048000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1048,29033.6,1048000,-27.713,-21.9,-35.5,277.13


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1049000
  custom_metrics: {}
  date: 2021-10-22_03-50-07
  done: false
  episode_len_mean: 274.61
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -27.461000000000116
  episode_reward_min: -35.500000000000234
  episodes_this_iter: 4
  episodes_total: 3488
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011512117829620052
          cur_lr: 5.000000000000001e-05
          entropy: 0.44559656348493365
          entropy_coeff: 0.009999999999999998
          kl: 0.026761695044067658
          policy_loss: 0.03648037314414978
          total_loss: 0.8307023194101122
          vf_explained_var: 0.47384440898895264
          vf_loss: 0.7983698298533758
    num_agent_steps_sampled: 1049000
    num_agent_steps_trained: 1049000
    num_steps_sampled: 1049000
    num_steps_trained: 10490

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1049,29063.1,1049000,-27.461,-21.9,-35.5,274.61


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1050000
  custom_metrics: {}
  date: 2021-10-22_03-50-36
  done: false
  episode_len_mean: 272.42
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -27.24200000000012
  episode_reward_min: -35.500000000000234
  episodes_this_iter: 4
  episodes_total: 3492
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017268176744430073
          cur_lr: 5.000000000000001e-05
          entropy: 0.50373944275909
          entropy_coeff: 0.009999999999999998
          kl: 0.08479079959004454
          policy_loss: -0.008585785577694575
          total_loss: 0.6758990857336257
          vf_explained_var: 0.5021922588348389
          vf_loss: 0.6880580749776628
    num_agent_steps_sampled: 1050000
    num_agent_steps_trained: 1050000
    num_steps_sampled: 1050000
    num_steps_trained: 1050000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1050,29092.3,1050000,-27.242,-21.9,-35.5,272.42


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1051000
  custom_metrics: {}
  date: 2021-10-22_03-51-04
  done: false
  episode_len_mean: 270.25
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -27.025000000000112
  episode_reward_min: -35.500000000000234
  episodes_this_iter: 4
  episodes_total: 3496
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02590226511664511
          cur_lr: 5.000000000000001e-05
          entropy: 0.3295604808462991
          entropy_coeff: 0.009999999999999998
          kl: 0.012514592687474627
          policy_loss: 0.04589036765197913
          total_loss: 0.8588294161690606
          vf_explained_var: 0.3956524431705475
          vf_loss: 0.8159104916784499
    num_agent_steps_sampled: 1051000
    num_agent_steps_trained: 1051000
    num_steps_sampled: 1051000
    num_steps_trained: 1051000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1051,29120.3,1051000,-27.025,-21.9,-35.5,270.25


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1052000
  custom_metrics: {}
  date: 2021-10-22_03-51-30
  done: false
  episode_len_mean: 268.13
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.813000000000105
  episode_reward_min: -33.400000000000205
  episodes_this_iter: 4
  episodes_total: 3500
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02590226511664511
          cur_lr: 5.000000000000001e-05
          entropy: 0.47994343042373655
          entropy_coeff: 0.009999999999999998
          kl: 0.030679213099947193
          policy_loss: 0.018475753400060864
          total_loss: 1.0265106876691183
          vf_explained_var: 0.16373063623905182
          vf_loss: 1.0120397097534604
    num_agent_steps_sampled: 1052000
    num_agent_steps_trained: 1052000
    num_steps_sampled: 1052000
    num_steps_trained: 10520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1052,29146.5,1052000,-26.813,-21.9,-33.4,268.13


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1053000
  custom_metrics: {}
  date: 2021-10-22_03-52-00
  done: false
  episode_len_mean: 266.42
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.642000000000106
  episode_reward_min: -33.400000000000205
  episodes_this_iter: 4
  episodes_total: 3504
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03885339767496766
          cur_lr: 5.000000000000001e-05
          entropy: 0.2722658089465565
          entropy_coeff: 0.009999999999999998
          kl: 0.004894287426006925
          policy_loss: 0.035840135688583054
          total_loss: 0.8196584575706058
          vf_explained_var: 0.3748769164085388
          vf_loss: 0.7863508197996352
    num_agent_steps_sampled: 1053000
    num_agent_steps_trained: 1053000
    num_steps_sampled: 1053000
    num_steps_trained: 1053000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1053,29176.2,1053000,-26.642,-21.9,-33.4,266.42


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1054000
  custom_metrics: {}
  date: 2021-10-22_03-52-29
  done: false
  episode_len_mean: 265.02
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.50200000000011
  episode_reward_min: -33.400000000000205
  episodes_this_iter: 4
  episodes_total: 3508
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01942669883748383
          cur_lr: 5.000000000000001e-05
          entropy: 0.3661467628346549
          entropy_coeff: 0.009999999999999998
          kl: 0.006837570293351665
          policy_loss: 0.025861008713642757
          total_loss: 0.8912655062145657
          vf_explained_var: 0.4703746438026428
          vf_loss: 0.8689331399069892
    num_agent_steps_sampled: 1054000
    num_agent_steps_trained: 1054000
    num_steps_sampled: 1054000
    num_steps_trained: 1054000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1054,29205.8,1054000,-26.502,-21.9,-33.4,265.02




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1055000
  custom_metrics: {}
  date: 2021-10-22_03-53-18
  done: false
  episode_len_mean: 262.89
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -26.289000000000105
  episode_reward_min: -33.400000000000205
  episodes_this_iter: 4
  episodes_total: 3512
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01942669883748383
          cur_lr: 5.000000000000001e-05
          entropy: 0.273601582315233
          entropy_coeff: 0.009999999999999998
          kl: 0.012448170864553434
          policy_loss: -0.1077093100382222
          total_loss: 1.1827714655134414
          vf_explained_var: 0.35362085700035095
          vf_loss: 1.292974967426724
    num_agent_steps_sampled: 1055000
    num_agent_steps_trained: 1055000
    num_steps_sampled: 1055000
    num_steps_trained: 1055000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1055,29254.4,1055000,-26.289,-21.3,-33.4,262.89


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1056000
  custom_metrics: {}
  date: 2021-10-22_03-53-47
  done: false
  episode_len_mean: 259.84
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.984000000000105
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 5
  episodes_total: 3517
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01942669883748383
          cur_lr: 5.000000000000001e-05
          entropy: 0.28166964865393107
          entropy_coeff: 0.009999999999999998
          kl: 0.006395751897310713
          policy_loss: -0.026450408125917115
          total_loss: 0.9961040377616882
          vf_explained_var: 0.4395928382873535
          vf_loss: 1.0252468824386596
    num_agent_steps_sampled: 1056000
    num_agent_steps_trained: 1056000
    num_steps_sampled: 1056000
    num_steps_trained: 1056

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1056,29283.7,1056000,-25.984,-21.3,-32.8,259.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1057000
  custom_metrics: {}
  date: 2021-10-22_03-54-16
  done: false
  episode_len_mean: 258.4
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.8400000000001
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 4
  episodes_total: 3521
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01942669883748383
          cur_lr: 5.000000000000001e-05
          entropy: 0.24804206689198813
          entropy_coeff: 0.009999999999999998
          kl: 0.005438801576961715
          policy_loss: 0.03631665665242407
          total_loss: 0.9006025797790951
          vf_explained_var: 0.5172871351242065
          vf_loss: 0.8666606909698911
    num_agent_steps_sampled: 1057000
    num_agent_steps_trained: 1057000
    num_steps_sampled: 1057000
    num_steps_trained: 1057000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1057,29312.5,1057000,-25.84,-21.3,-32.8,258.4


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1058000
  custom_metrics: {}
  date: 2021-10-22_03-54-46
  done: false
  episode_len_mean: 257.31
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.731000000000094
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 4
  episodes_total: 3525
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01942669883748383
          cur_lr: 5.000000000000001e-05
          entropy: 0.25306168463495043
          entropy_coeff: 0.009999999999999998
          kl: 0.013999513919876035
          policy_loss: 0.048238057146469754
          total_loss: 0.8088551292816798
          vf_explained_var: 0.5799577236175537
          vf_loss: 0.7628757291369967
    num_agent_steps_sampled: 1058000
    num_agent_steps_trained: 1058000
    num_steps_sampled: 1058000
    num_steps_trained: 10580

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1058,29342.7,1058000,-25.731,-21.3,-32.8,257.31


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1059000
  custom_metrics: {}
  date: 2021-10-22_03-55-16
  done: false
  episode_len_mean: 256.66
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.666000000000096
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 4
  episodes_total: 3529
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01942669883748383
          cur_lr: 5.000000000000001e-05
          entropy: 0.184436624414391
          entropy_coeff: 0.009999999999999998
          kl: 0.0038328487508066033
          policy_loss: 0.04607964911394649
          total_loss: 0.7448502169715033
          vf_explained_var: 0.638031542301178
          vf_loss: 0.7005404796865251
    num_agent_steps_sampled: 1059000
    num_agent_steps_trained: 1059000
    num_steps_sampled: 1059000
    num_steps_trained: 1059000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1059,29372.2,1059000,-25.666,-21.3,-32.8,256.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1060000
  custom_metrics: {}
  date: 2021-10-22_03-55-46
  done: false
  episode_len_mean: 254.63
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.463000000000093
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 4
  episodes_total: 3533
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009713349418741916
          cur_lr: 5.000000000000001e-05
          entropy: 0.2803783353832033
          entropy_coeff: 0.009999999999999998
          kl: 0.023734458922723464
          policy_loss: 0.0070087443623277875
          total_loss: 0.8516380336549547
          vf_explained_var: 0.5323289036750793
          vf_loss: 0.8472025281853146
    num_agent_steps_sampled: 1060000
    num_agent_steps_trained: 1060000
    num_steps_sampled: 1060000
    num_steps_trained: 1060

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1060,29402,1060000,-25.463,-21.3,-32.8,254.63


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1061000
  custom_metrics: {}
  date: 2021-10-22_03-56-12
  done: false
  episode_len_mean: 253.8
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.38000000000009
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 4
  episodes_total: 3537
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01457002412811287
          cur_lr: 5.000000000000001e-05
          entropy: 0.19310818115870157
          entropy_coeff: 0.009999999999999998
          kl: 0.032932883456153625
          policy_loss: 0.02641522805723879
          total_loss: 0.9844290190272861
          vf_explained_var: 0.4951961040496826
          vf_loss: 0.9594650467236837
    num_agent_steps_sampled: 1061000
    num_agent_steps_trained: 1061000
    num_steps_sampled: 1061000
    num_steps_trained: 1061000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1061,29428.3,1061000,-25.38,-21.3,-32.8,253.8




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1062000
  custom_metrics: {}
  date: 2021-10-22_03-56-57
  done: false
  episode_len_mean: 252.27
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.227000000000086
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 4
  episodes_total: 3541
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021855036192169306
          cur_lr: 5.000000000000001e-05
          entropy: 0.2284640360209677
          entropy_coeff: 0.009999999999999998
          kl: 0.026980993156492
          policy_loss: 0.02570887506008148
          total_loss: 0.7971644931369357
          vf_explained_var: 0.4417424499988556
          vf_loss: 0.7731505874130461
    num_agent_steps_sampled: 1062000
    num_agent_steps_trained: 1062000
    num_steps_sampled: 1062000
    num_steps_trained: 1062000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1062,29472.7,1062000,-25.227,-21.3,-32.8,252.27


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1063000
  custom_metrics: {}
  date: 2021-10-22_03-57-25
  done: false
  episode_len_mean: 251.04
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.104000000000088
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 4
  episodes_total: 3545
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03278255428825396
          cur_lr: 5.000000000000001e-05
          entropy: 0.16510712256034216
          entropy_coeff: 0.009999999999999998
          kl: 0.005314438974122342
          policy_loss: -0.013741464912891388
          total_loss: 1.0113962643676333
          vf_explained_var: 0.324523389339447
          vf_loss: 1.0266145745913187
    num_agent_steps_sampled: 1063000
    num_agent_steps_trained: 1063000
    num_steps_sampled: 1063000
    num_steps_trained: 10630

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1063,29501.6,1063000,-25.104,-21.3,-32.8,251.04


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1064000
  custom_metrics: {}
  date: 2021-10-22_03-57-55
  done: false
  episode_len_mean: 249.37
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -24.937000000000083
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 4
  episodes_total: 3549
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03278255428825396
          cur_lr: 5.000000000000001e-05
          entropy: 0.18937309839659267
          entropy_coeff: 0.009999999999999998
          kl: 0.012675658695528587
          policy_loss: -0.010994357532925075
          total_loss: 0.7858753992451561
          vf_explained_var: 0.5038641691207886
          vf_loss: 0.7983479373984866
    num_agent_steps_sampled: 1064000
    num_agent_steps_trained: 1064000
    num_steps_sampled: 1064000
    num_steps_trained: 1064

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1064,29531.1,1064000,-24.937,-21.3,-32.8,249.37


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1065000
  custom_metrics: {}
  date: 2021-10-22_03-58-24
  done: false
  episode_len_mean: 247.32
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -24.732000000000074
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 5
  episodes_total: 3554
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03278255428825396
          cur_lr: 5.000000000000001e-05
          entropy: 0.1969438376526038
          entropy_coeff: 0.009999999999999998
          kl: 0.0049769976974384945
          policy_loss: -0.03782686942981349
          total_loss: 1.1154878861374324
          vf_explained_var: 0.37954792380332947
          vf_loss: 1.155121009879642
    num_agent_steps_sampled: 1065000
    num_agent_steps_trained: 1065000
    num_steps_sampled: 1065000
    num_steps_trained: 10650

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1065,29560.4,1065000,-24.732,-21.3,-32.8,247.32


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1066000
  custom_metrics: {}
  date: 2021-10-22_03-58-53
  done: false
  episode_len_mean: 246.36
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -24.63600000000008
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 4
  episodes_total: 3558
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01639127714412698
          cur_lr: 5.000000000000001e-05
          entropy: 0.21608178764581681
          entropy_coeff: 0.009999999999999998
          kl: 0.017635054281752598
          policy_loss: 0.0012578664554489982
          total_loss: 0.7472148418426514
          vf_explained_var: 0.5749749541282654
          vf_loss: 0.7478287405437893
    num_agent_steps_sampled: 1066000
    num_agent_steps_trained: 1066000
    num_steps_sampled: 1066000
    num_steps_trained: 10660

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1066,29588.9,1066000,-24.636,-21.3,-32.8,246.36


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1067000
  custom_metrics: {}
  date: 2021-10-22_03-59-23
  done: false
  episode_len_mean: 245.81
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -24.581000000000078
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 4
  episodes_total: 3562
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01639127714412698
          cur_lr: 5.000000000000001e-05
          entropy: 0.18939113633500204
          entropy_coeff: 0.009999999999999998
          kl: 0.006484630656028006
          policy_loss: 0.027891172303093806
          total_loss: 0.9536934852600097
          vf_explained_var: 0.3846516013145447
          vf_loss: 0.927589926454756
    num_agent_steps_sampled: 1067000
    num_agent_steps_trained: 1067000
    num_steps_sampled: 1067000
    num_steps_trained: 106700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1067,29618.7,1067000,-24.581,-21.3,-32.8,245.81


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1068000
  custom_metrics: {}
  date: 2021-10-22_03-59-53
  done: false
  episode_len_mean: 244.89
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -24.489000000000075
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 4
  episodes_total: 3566
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01639127714412698
          cur_lr: 5.000000000000001e-05
          entropy: 0.19430642078320184
          entropy_coeff: 0.009999999999999998
          kl: 0.004887865030659968
          policy_loss: 0.02279040954179234
          total_loss: 0.9729265683227115
          vf_explained_var: 0.2698100507259369
          vf_loss: 0.9519991066720751
    num_agent_steps_sampled: 1068000
    num_agent_steps_trained: 1068000
    num_steps_sampled: 1068000
    num_steps_trained: 106800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1068,29648.9,1068000,-24.489,-21.3,-32.8,244.89




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1069000
  custom_metrics: {}
  date: 2021-10-22_04-00-39
  done: false
  episode_len_mean: 244.26
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -24.426000000000077
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 4
  episodes_total: 3570
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00819563857206349
          cur_lr: 5.000000000000001e-05
          entropy: 0.1649083779917823
          entropy_coeff: 0.009999999999999998
          kl: 0.0067781611791398666
          policy_loss: 0.017125813331868914
          total_loss: 0.795672078927358
          vf_explained_var: 0.41791194677352905
          vf_loss: 0.7801397932900322
    num_agent_steps_sampled: 1069000
    num_agent_steps_trained: 1069000
    num_steps_sampled: 1069000
    num_steps_trained: 10690

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1069,29695.1,1069000,-24.426,-21.3,-32.8,244.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1070000
  custom_metrics: {}
  date: 2021-10-22_04-01-10
  done: false
  episode_len_mean: 243.59
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -24.35900000000008
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 4
  episodes_total: 3574
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00819563857206349
          cur_lr: 5.000000000000001e-05
          entropy: 0.14674833358989822
          entropy_coeff: 0.009999999999999998
          kl: 0.005572486345238145
          policy_loss: -0.02659354019496176
          total_loss: 1.0626399940914577
          vf_explained_var: 0.321512907743454
          vf_loss: 1.0906553347905477
    num_agent_steps_sampled: 1070000
    num_agent_steps_trained: 1070000
    num_steps_sampled: 1070000
    num_steps_trained: 1070000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1070,29726.4,1070000,-24.359,-21.3,-32.8,243.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1071000
  custom_metrics: {}
  date: 2021-10-22_04-01-40
  done: false
  episode_len_mean: 243.24
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -24.324000000000073
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 5
  episodes_total: 3579
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00819563857206349
          cur_lr: 5.000000000000001e-05
          entropy: 0.2941967113978333
          entropy_coeff: 0.009999999999999998
          kl: 0.050407768043570694
          policy_loss: -0.0106563581360711
          total_loss: 0.9071750240193472
          vf_explained_var: 0.6400957107543945
          vf_loss: 0.9203602135181427
    num_agent_steps_sampled: 1071000
    num_agent_steps_trained: 1071000
    num_steps_sampled: 1071000
    num_steps_trained: 1071000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1071,29755.9,1071000,-24.324,-21.3,-32.8,243.24


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1072000
  custom_metrics: {}
  date: 2021-10-22_04-02-07
  done: false
  episode_len_mean: 244.1
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -24.41000000000008
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 3
  episodes_total: 3582
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012293457858095241
          cur_lr: 5.000000000000001e-05
          entropy: 0.4577597730689579
          entropy_coeff: 0.009999999999999998
          kl: 0.023568507339895937
          policy_loss: 0.013387360465195444
          total_loss: 0.6200319399436315
          vf_explained_var: 0.31005993485450745
          vf_loss: 0.6109324398967955
    num_agent_steps_sampled: 1072000
    num_agent_steps_trained: 1072000
    num_steps_sampled: 1072000
    num_steps_trained: 107200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1072,29783.1,1072000,-24.41,-21.3,-32.8,244.1


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1073000
  custom_metrics: {}
  date: 2021-10-22_04-02-33
  done: false
  episode_len_mean: 245.27
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -24.527000000000076
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 4
  episodes_total: 3586
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01844018678714285
          cur_lr: 5.000000000000001e-05
          entropy: 0.5026952624320984
          entropy_coeff: 0.009999999999999998
          kl: 0.004687712986281964
          policy_loss: 0.03037786032590601
          total_loss: 1.5498498254352147
          vf_explained_var: 0.0435141921043396
          vf_loss: 1.5244124836391872
    num_agent_steps_sampled: 1073000
    num_agent_steps_trained: 1073000
    num_steps_sampled: 1073000
    num_steps_trained: 1073000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1073,29809.3,1073000,-24.527,-21.3,-32.8,245.27


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1074000
  custom_metrics: {}
  date: 2021-10-22_04-03-00
  done: false
  episode_len_mean: 246.55
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -24.655000000000083
  episode_reward_min: -32.800000000000196
  episodes_this_iter: 4
  episodes_total: 3590
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009220093393571426
          cur_lr: 5.000000000000001e-05
          entropy: 0.5248505350616243
          entropy_coeff: 0.009999999999999998
          kl: 0.004178881975473993
          policy_loss: 0.0035331453714105817
          total_loss: 1.5759641554620532
          vf_explained_var: 0.02575065568089485
          vf_loss: 1.5776409811443752
    num_agent_steps_sampled: 1074000
    num_agent_steps_trained: 1074000
    num_steps_sampled: 1074000
    num_steps_trained: 107

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1074,29835.7,1074000,-24.655,-21.3,-32.8,246.55


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1075000
  custom_metrics: {}
  date: 2021-10-22_04-03-22
  done: false
  episode_len_mean: 249.06
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -24.906000000000084
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 3
  episodes_total: 3593
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.004610046696785713
          cur_lr: 5.000000000000001e-05
          entropy: 0.4893941084543864
          entropy_coeff: 0.009999999999999998
          kl: 0.052754022332599404
          policy_loss: 0.048715463280677794
          total_loss: 1.1319581422540876
          vf_explained_var: -0.2295808345079422
          vf_loss: 1.0878934118482801
    num_agent_steps_sampled: 1075000
    num_agent_steps_trained: 1075000
    num_steps_sampled: 1075000
    num_steps_trained: 10750

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1075,29858,1075000,-24.906,-21.3,-34,249.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1076000
  custom_metrics: {}
  date: 2021-10-22_04-03-51
  done: false
  episode_len_mean: 248.28
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -24.828000000000078
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3597
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006915070045178569
          cur_lr: 5.000000000000001e-05
          entropy: 0.5382579849825965
          entropy_coeff: 0.009999999999999998
          kl: 0.010515192868502905
          policy_loss: 0.06772182989451621
          total_loss: 0.8705961538685693
          vf_explained_var: 0.6056473255157471
          vf_loss: 0.8081841945648194
    num_agent_steps_sampled: 1076000
    num_agent_steps_trained: 1076000
    num_steps_sampled: 1076000
    num_steps_trained: 1076000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1076,29887.3,1076000,-24.828,-21.3,-34,248.28




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1077000
  custom_metrics: {}
  date: 2021-10-22_04-04-40
  done: false
  episode_len_mean: 247.15
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.71500000000008
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3601
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006915070045178569
          cur_lr: 5.000000000000001e-05
          entropy: 0.5285237931542927
          entropy_coeff: 0.009999999999999998
          kl: 0.023396001679538852
          policy_loss: 0.058158899926477006
          total_loss: 1.0712631901105245
          vf_explained_var: 0.4143040180206299
          vf_loss: 1.0182277348306443
    num_agent_steps_sampled: 1077000
    num_agent_steps_trained: 1077000
    num_steps_sampled: 1077000
    num_steps_trained: 1077000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1077,29935.9,1077000,-24.715,-21.1,-34,247.15


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1078000
  custom_metrics: {}
  date: 2021-10-22_04-05-10
  done: false
  episode_len_mean: 246.91
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.69100000000008
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3605
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010372605067767854
          cur_lr: 5.000000000000001e-05
          entropy: 0.5922873891062207
          entropy_coeff: 0.009999999999999998
          kl: 0.048363733405748774
          policy_loss: -0.06799043847454919
          total_loss: 1.2341327084435356
          vf_explained_var: 0.2807064950466156
          vf_loss: 1.307544351948632
    num_agent_steps_sampled: 1078000
    num_agent_steps_trained: 1078000
    num_steps_sampled: 1078000
    num_steps_trained: 1078000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1078,29965.9,1078000,-24.691,-21.1,-34,246.91


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1079000
  custom_metrics: {}
  date: 2021-10-22_04-05-40
  done: false
  episode_len_mean: 246.92
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.69200000000008
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3609
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.015558907601651784
          cur_lr: 5.000000000000001e-05
          entropy: 0.4878531899717119
          entropy_coeff: 0.009999999999999998
          kl: 0.004576236050567909
          policy_loss: -0.10229958035051823
          total_loss: 0.8203142017126084
          vf_explained_var: 0.5714278817176819
          vf_loss: 0.9274211095439063
    num_agent_steps_sampled: 1079000
    num_agent_steps_trained: 1079000
    num_steps_sampled: 1079000
    num_steps_trained: 1079000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1079,29996.1,1079000,-24.692,-21.1,-34,246.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1080000
  custom_metrics: {}
  date: 2021-10-22_04-06-09
  done: false
  episode_len_mean: 247.23
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.723000000000074
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 5
  episodes_total: 3614
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007779453800825892
          cur_lr: 5.000000000000001e-05
          entropy: 0.47448348369863297
          entropy_coeff: 0.009999999999999998
          kl: 0.01936529737098201
          policy_loss: 0.016601369778315226
          total_loss: 0.7268345250023736
          vf_explained_var: 0.6730525493621826
          vf_loss: 0.7148273441526625
    num_agent_steps_sampled: 1080000
    num_agent_steps_trained: 1080000
    num_steps_sampled: 1080000
    num_steps_trained: 1080000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1080,30025.1,1080000,-24.723,-21.1,-34,247.23


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1081000
  custom_metrics: {}
  date: 2021-10-22_04-06-38
  done: false
  episode_len_mean: 247.64
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.764000000000074
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3618
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007779453800825892
          cur_lr: 5.000000000000001e-05
          entropy: 0.45066639218065474
          entropy_coeff: 0.009999999999999998
          kl: 0.026575748825504775
          policy_loss: 0.03431133478879929
          total_loss: 0.679541489150789
          vf_explained_var: 0.6423547267913818
          vf_loss: 0.6495300690333049
    num_agent_steps_sampled: 1081000
    num_agent_steps_trained: 1081000
    num_steps_sampled: 1081000
    num_steps_trained: 1081000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1081,30053.8,1081000,-24.764,-21.1,-34,247.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1082000
  custom_metrics: {}
  date: 2021-10-22_04-07-06
  done: false
  episode_len_mean: 248.0
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.80000000000008
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3622
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011669180701238839
          cur_lr: 5.000000000000001e-05
          entropy: 0.43495545519722834
          entropy_coeff: 0.009999999999999998
          kl: 0.03664752821003516
          policy_loss: 0.010734047575129402
          total_loss: 0.8164185689555274
          vf_explained_var: 0.4474014341831207
          vf_loss: 0.8096064216560788
    num_agent_steps_sampled: 1082000
    num_agent_steps_trained: 1082000
    num_steps_sampled: 1082000
    num_steps_trained: 1082000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1082,30082.1,1082000,-24.8,-21.1,-34,248


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1083000
  custom_metrics: {}
  date: 2021-10-22_04-07-36
  done: false
  episode_len_mean: 248.36
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.83600000000008
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3626
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01750377105185826
          cur_lr: 5.000000000000001e-05
          entropy: 0.30240527937809625
          entropy_coeff: 0.009999999999999998
          kl: 0.003809829116916161
          policy_loss: 0.009398209841714964
          total_loss: 0.8371991707219018
          vf_explained_var: 0.46105095744132996
          vf_loss: 0.8307583292325338
    num_agent_steps_sampled: 1083000
    num_agent_steps_trained: 1083000
    num_steps_sampled: 1083000
    num_steps_trained: 1083000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1083,30111.3,1083000,-24.836,-21.1,-34,248.36




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1084000
  custom_metrics: {}
  date: 2021-10-22_04-08-21
  done: false
  episode_len_mean: 248.42
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.84200000000008
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3630
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00875188552592913
          cur_lr: 5.000000000000001e-05
          entropy: 0.3584287186463674
          entropy_coeff: 0.009999999999999998
          kl: 0.0158097641454974
          policy_loss: 0.012705788016319275
          total_loss: 0.7430727276537153
          vf_explained_var: 0.5240377187728882
          vf_loss: 0.7338128639592065
    num_agent_steps_sampled: 1084000
    num_agent_steps_trained: 1084000
    num_steps_sampled: 1084000
    num_steps_trained: 1084000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1084,30156.8,1084000,-24.842,-21.1,-34,248.42


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1085000
  custom_metrics: {}
  date: 2021-10-22_04-08-51
  done: false
  episode_len_mean: 248.45
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.84500000000008
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3634
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00875188552592913
          cur_lr: 5.000000000000001e-05
          entropy: 0.32701728923453227
          entropy_coeff: 0.009999999999999998
          kl: 0.02305471236023114
          policy_loss: 0.03467581942677498
          total_loss: 0.8251411616802216
          vf_explained_var: 0.5051552653312683
          vf_loss: 0.7935337450769212
    num_agent_steps_sampled: 1085000
    num_agent_steps_trained: 1085000
    num_steps_sampled: 1085000
    num_steps_trained: 1085000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1085,30186.7,1085000,-24.845,-21.1,-34,248.45


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1086000
  custom_metrics: {}
  date: 2021-10-22_04-09-19
  done: false
  episode_len_mean: 247.61
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.761000000000084
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3638
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013127828288893697
          cur_lr: 5.000000000000001e-05
          entropy: 0.2531161934137344
          entropy_coeff: 0.009999999999999998
          kl: 0.0034259266160772328
          policy_loss: 0.016932160407304764
          total_loss: 0.7795086668597327
          vf_explained_var: 0.500784158706665
          vf_loss: 0.7650626891189152
    num_agent_steps_sampled: 1086000
    num_agent_steps_trained: 1086000
    num_steps_sampled: 1086000
    num_steps_trained: 1086000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1086,30214.8,1086000,-24.761,-21.1,-34,247.61


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1087000
  custom_metrics: {}
  date: 2021-10-22_04-09-46
  done: false
  episode_len_mean: 248.14
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.814000000000078
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3642
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006563914144446848
          cur_lr: 5.000000000000001e-05
          entropy: 0.32507585171196196
          entropy_coeff: 0.009999999999999998
          kl: 0.012834798635858707
          policy_loss: 0.011090032094054752
          total_loss: 0.9035053299532996
          vf_explained_var: 0.3609083890914917
          vf_loss: 0.8955818123287624
    num_agent_steps_sampled: 1087000
    num_agent_steps_trained: 1087000
    num_steps_sampled: 1087000
    num_steps_trained: 108700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1087,30241.7,1087000,-24.814,-21.1,-34,248.14


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1088000
  custom_metrics: {}
  date: 2021-10-22_04-10-17
  done: false
  episode_len_mean: 247.93
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.793000000000077
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3646
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006563914144446848
          cur_lr: 5.000000000000001e-05
          entropy: 0.15013065015276272
          entropy_coeff: 0.009999999999999998
          kl: 0.002143203567354135
          policy_loss: 0.014426718486679925
          total_loss: 0.9775810698668163
          vf_explained_var: 0.345905065536499
          vf_loss: 0.9646415895885891
    num_agent_steps_sampled: 1088000
    num_agent_steps_trained: 1088000
    num_steps_sampled: 1088000
    num_steps_trained: 1088000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1088,30272.5,1088000,-24.793,-21.1,-34,247.93


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1089000
  custom_metrics: {}
  date: 2021-10-22_04-10-45
  done: false
  episode_len_mean: 248.29
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.82900000000008
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3650
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.003281957072223424
          cur_lr: 5.000000000000001e-05
          entropy: 0.18208952926927144
          entropy_coeff: 0.009999999999999998
          kl: 0.01051761165153589
          policy_loss: 0.043233653820223275
          total_loss: 0.9887074020173815
          vf_explained_var: 0.40213748812675476
          vf_loss: 0.9472601241535611
    num_agent_steps_sampled: 1089000
    num_agent_steps_trained: 1089000
    num_steps_sampled: 1089000
    num_steps_trained: 1089000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1089,30300.7,1089000,-24.829,-21.1,-34,248.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1090000
  custom_metrics: {}
  date: 2021-10-22_04-11-15
  done: false
  episode_len_mean: 248.3
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.830000000000076
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3654
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.003281957072223424
          cur_lr: 5.000000000000001e-05
          entropy: 0.1506042165060838
          entropy_coeff: 0.009999999999999998
          kl: 0.003752179673289804
          policy_loss: 0.010285337766011557
          total_loss: 0.9293997546037038
          vf_explained_var: 0.48879390954971313
          vf_loss: 0.9206081469853719
    num_agent_steps_sampled: 1090000
    num_agent_steps_trained: 1090000
    num_steps_sampled: 1090000
    num_steps_trained: 1090000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1090,30330.8,1090000,-24.83,-21.1,-34,248.3


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1091000
  custom_metrics: {}
  date: 2021-10-22_04-11-46
  done: false
  episode_len_mean: 247.75
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.77500000000008
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 5
  episodes_total: 3659
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001640978536111712
          cur_lr: 5.000000000000001e-05
          entropy: 0.16407598308804963
          entropy_coeff: 0.009999999999999998
          kl: 0.022073409482112224
          policy_loss: -0.0008222011228402455
          total_loss: 1.1574758211771647
          vf_explained_var: 0.4609900116920471
          vf_loss: 1.1599025547504425
    num_agent_steps_sampled: 1091000
    num_agent_steps_trained: 1091000
    num_steps_sampled: 1091000
    num_steps_trained: 10910

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1091,30362,1091000,-24.775,-21.1,-34,247.75




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1092000
  custom_metrics: {}
  date: 2021-10-22_04-12-38
  done: false
  episode_len_mean: 246.7
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -24.670000000000083
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3663
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0024614678041675678
          cur_lr: 5.000000000000001e-05
          entropy: 0.07223827321496275
          entropy_coeff: 0.009999999999999998
          kl: 0.0027990230554873967
          policy_loss: 0.042435207217931745
          total_loss: 1.1348381108707852
          vf_explained_var: 0.09914679080247879
          vf_loss: 1.0931184113025665
    num_agent_steps_sampled: 1092000
    num_agent_steps_trained: 1092000
    num_steps_sampled: 1092000
    num_steps_trained: 1092

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1092,30413.9,1092000,-24.67,-18.4,-34,246.7


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1093000
  custom_metrics: {}
  date: 2021-10-22_04-13-10
  done: false
  episode_len_mean: 246.11
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -24.611000000000082
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 5
  episodes_total: 3668
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0012307339020837839
          cur_lr: 5.000000000000001e-05
          entropy: 0.06345575985809167
          entropy_coeff: 0.009999999999999998
          kl: 0.007259532635982898
          policy_loss: -0.01267177669538392
          total_loss: 1.3786819358666738
          vf_explained_var: 0.10821860283613205
          vf_loss: 1.391979322830836
    num_agent_steps_sampled: 1093000
    num_agent_steps_trained: 1093000
    num_steps_sampled: 1093000
    num_steps_trained: 10930

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1093,30445.4,1093000,-24.611,-18.4,-34,246.11


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1094000
  custom_metrics: {}
  date: 2021-10-22_04-13-42
  done: false
  episode_len_mean: 245.78
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -24.578000000000085
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3672
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0012307339020837839
          cur_lr: 5.000000000000001e-05
          entropy: 0.28175216408239473
          entropy_coeff: 0.009999999999999998
          kl: 0.04891045025608675
          policy_loss: 0.030977318187554677
          total_loss: 1.0303730100393296
          vf_explained_var: 0.2073880285024643
          vf_loss: 1.0021530249052577
    num_agent_steps_sampled: 1094000
    num_agent_steps_trained: 1094000
    num_steps_sampled: 1094000
    num_steps_trained: 109400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1094,30477.1,1094000,-24.578,-18.4,-34,245.78


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1095000
  custom_metrics: {}
  date: 2021-10-22_04-14-11
  done: false
  episode_len_mean: 245.44
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -24.54400000000008
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3676
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018461008531256763
          cur_lr: 5.000000000000001e-05
          entropy: 0.11508373216622406
          entropy_coeff: 0.009999999999999998
          kl: 0.004480407672497775
          policy_loss: -0.010656854179170396
          total_loss: 0.5077379362450706
          vf_explained_var: 0.6894654631614685
          vf_loss: 0.5195373598072264
    num_agent_steps_sampled: 1095000
    num_agent_steps_trained: 1095000
    num_steps_sampled: 1095000
    num_steps_trained: 10950

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1095,30506.3,1095000,-24.544,-18.4,-34,245.44


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1096000
  custom_metrics: {}
  date: 2021-10-22_04-14-40
  done: false
  episode_len_mean: 245.11
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -24.511000000000077
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3680
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009230504265628381
          cur_lr: 5.000000000000001e-05
          entropy: 0.24318773696819942
          entropy_coeff: 0.009999999999999998
          kl: 0.008102816825416899
          policy_loss: -0.04507218499978383
          total_loss: 0.5361062218745549
          vf_explained_var: 0.5035485029220581
          vf_loss: 0.5836028079191844
    num_agent_steps_sampled: 1096000
    num_agent_steps_trained: 1096000
    num_steps_sampled: 1096000
    num_steps_trained: 10960

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1096,30535.8,1096000,-24.511,-18.4,-34,245.11


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1097000
  custom_metrics: {}
  date: 2021-10-22_04-15-10
  done: false
  episode_len_mean: 243.74
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -24.374000000000077
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 5
  episodes_total: 3685
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009230504265628381
          cur_lr: 5.000000000000001e-05
          entropy: 0.1787750307056639
          entropy_coeff: 0.009999999999999998
          kl: 0.0066333043975025496
          policy_loss: -0.06349548622965813
          total_loss: 0.5593923429648081
          vf_explained_var: 0.5963154435157776
          vf_loss: 0.6246694531705644
    num_agent_steps_sampled: 1097000
    num_agent_steps_trained: 1097000
    num_steps_sampled: 1097000
    num_steps_trained: 10970

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1097,30565.2,1097000,-24.374,-18.4,-34,243.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1098000
  custom_metrics: {}
  date: 2021-10-22_04-15-39
  done: false
  episode_len_mean: 242.09
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -24.209000000000074
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 3689
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009230504265628381
          cur_lr: 5.000000000000001e-05
          entropy: 0.18163121574454838
          entropy_coeff: 0.009999999999999998
          kl: 0.012945180614070134
          policy_loss: -0.011530385331975089
          total_loss: 0.6435344255632824
          vf_explained_var: 0.48764872550964355
          vf_loss: 0.6568691710631053
    num_agent_steps_sampled: 1098000
    num_agent_steps_trained: 1098000
    num_steps_sampled: 1098000
    num_steps_trained: 109

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1098,30594.2,1098000,-24.209,-18.4,-34,242.09




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1099000
  custom_metrics: {}
  date: 2021-10-22_04-16-26
  done: false
  episode_len_mean: 239.23
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -23.923000000000073
  episode_reward_min: -27.50000000000012
  episodes_this_iter: 4
  episodes_total: 3693
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009230504265628381
          cur_lr: 5.000000000000001e-05
          entropy: 0.1351847360531489
          entropy_coeff: 0.009999999999999998
          kl: 0.0020655276494115546
          policy_loss: 0.043462112297614416
          total_loss: 0.7658586488829718
          vf_explained_var: 0.2990289628505707
          vf_loss: 0.7237464792198605
    num_agent_steps_sampled: 1099000
    num_agent_steps_trained: 1099000
    num_steps_sampled: 1099000
    num_steps_trained: 10990

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1099,30641.1,1099000,-23.923,-18.4,-27.5,239.23


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1100000
  custom_metrics: {}
  date: 2021-10-22_04-16-56
  done: false
  episode_len_mean: 239.27
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -23.927000000000067
  episode_reward_min: -27.50000000000012
  episodes_this_iter: 4
  episodes_total: 3697
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00046152521328141907
          cur_lr: 5.000000000000001e-05
          entropy: 0.1599083030389415
          entropy_coeff: 0.009999999999999998
          kl: 0.008212360205839778
          policy_loss: 0.04620442659490638
          total_loss: 0.8614005612002479
          vf_explained_var: 0.30273011326789856
          vf_loss: 0.8167914178636339
    num_agent_steps_sampled: 1100000
    num_agent_steps_trained: 1100000
    num_steps_sampled: 1100000
    num_steps_trained: 11000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1100,30671.4,1100000,-23.927,-18.4,-27.5,239.27


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1101000
  custom_metrics: {}
  date: 2021-10-22_04-17-23
  done: false
  episode_len_mean: 239.92
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -23.992000000000075
  episode_reward_min: -27.50000000000012
  episodes_this_iter: 4
  episodes_total: 3701
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00046152521328141907
          cur_lr: 5.000000000000001e-05
          entropy: 0.22304937889178594
          entropy_coeff: 0.009999999999999998
          kl: 0.0312763498022515
          policy_loss: 0.023400873401098782
          total_loss: 0.952962726354599
          vf_explained_var: 0.2204979807138443
          vf_loss: 0.931777909066942
    num_agent_steps_sampled: 1101000
    num_agent_steps_trained: 1101000
    num_steps_sampled: 1101000
    num_steps_trained: 1101000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1101,30698.7,1101000,-23.992,-18.4,-27.5,239.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1102000
  custom_metrics: {}
  date: 2021-10-22_04-17-53
  done: false
  episode_len_mean: 240.57
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -24.057000000000077
  episode_reward_min: -27.50000000000012
  episodes_this_iter: 4
  episodes_total: 3705
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0006922878199221283
          cur_lr: 5.000000000000001e-05
          entropy: 0.2817908823490143
          entropy_coeff: 0.009999999999999998
          kl: 0.01069457708120609
          policy_loss: 0.021118305126825967
          total_loss: 1.0199942198064593
          vf_explained_var: 0.2888982892036438
          vf_loss: 1.0016864193810358
    num_agent_steps_sampled: 1102000
    num_agent_steps_trained: 1102000
    num_steps_sampled: 1102000
    num_steps_trained: 1102000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1102,30728.5,1102000,-24.057,-18.4,-27.5,240.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1103000
  custom_metrics: {}
  date: 2021-10-22_04-18-24
  done: false
  episode_len_mean: 240.7
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -24.07000000000008
  episode_reward_min: -27.50000000000012
  episodes_this_iter: 4
  episodes_total: 3709
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0006922878199221283
          cur_lr: 5.000000000000001e-05
          entropy: 0.2584164339635107
          entropy_coeff: 0.009999999999999998
          kl: 0.025424393531516643
          policy_loss: 0.0258858366145028
          total_loss: 0.8654028064674801
          vf_explained_var: 0.48710474371910095
          vf_loss: 0.8420835389031305
    num_agent_steps_sampled: 1103000
    num_agent_steps_trained: 1103000
    num_steps_sampled: 1103000
    num_steps_trained: 1103000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1103,30759,1103000,-24.07,-18.4,-27.5,240.7


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1104000
  custom_metrics: {}
  date: 2021-10-22_04-18-53
  done: false
  episode_len_mean: 240.86
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -24.086000000000073
  episode_reward_min: -27.50000000000012
  episodes_this_iter: 4
  episodes_total: 3713
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001038431729883193
          cur_lr: 5.000000000000001e-05
          entropy: 0.14994517515103023
          entropy_coeff: 0.009999999999999998
          kl: 0.0146386295416322
          policy_loss: 0.01979415805803405
          total_loss: 0.9254066142770979
          vf_explained_var: 0.49669408798217773
          vf_loss: 0.9070966892772251
    num_agent_steps_sampled: 1104000
    num_agent_steps_trained: 1104000
    num_steps_sampled: 1104000
    num_steps_trained: 1104000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1104,30788,1104000,-24.086,-18.4,-27.5,240.86


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1105000
  custom_metrics: {}
  date: 2021-10-22_04-19-23
  done: false
  episode_len_mean: 240.72
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -24.07200000000007
  episode_reward_min: -27.50000000000012
  episodes_this_iter: 4
  episodes_total: 3717
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001038431729883193
          cur_lr: 5.000000000000001e-05
          entropy: 0.1060668440742625
          entropy_coeff: 0.009999999999999998
          kl: 0.010518368992721131
          policy_loss: -0.08090339907341533
          total_loss: 1.137726288371616
          vf_explained_var: 0.4324033856391907
          vf_loss: 1.2196794304582808
    num_agent_steps_sampled: 1105000
    num_agent_steps_trained: 1105000
    num_steps_sampled: 1105000
    num_steps_trained: 1105000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1105,30817.9,1105000,-24.072,-18.4,-27.5,240.72




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1106000
  custom_metrics: {}
  date: 2021-10-22_04-20-10
  done: false
  episode_len_mean: 240.0
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -24.000000000000068
  episode_reward_min: -27.200000000000117
  episodes_this_iter: 5
  episodes_total: 3722
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001038431729883193
          cur_lr: 5.000000000000001e-05
          entropy: 0.20213338649935192
          entropy_coeff: 0.009999999999999998
          kl: 0.004250760695011498
          policy_loss: -0.010914466033379236
          total_loss: 1.034013729625278
          vf_explained_var: 0.5254367589950562
          vf_loss: 1.0469451235400307
    num_agent_steps_sampled: 1106000
    num_agent_steps_trained: 1106000
    num_steps_sampled: 1106000
    num_steps_trained: 110600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1106,30865.3,1106000,-24,-18.4,-27.2,240


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1107000
  custom_metrics: {}
  date: 2021-10-22_04-20-39
  done: false
  episode_len_mean: 239.82
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -23.98200000000007
  episode_reward_min: -27.200000000000117
  episodes_this_iter: 4
  episodes_total: 3726
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005192158649415965
          cur_lr: 5.000000000000001e-05
          entropy: 0.14819219667050573
          entropy_coeff: 0.009999999999999998
          kl: 0.0026675376442724208
          policy_loss: -0.006531133751074473
          total_loss: 0.7877899997764164
          vf_explained_var: 0.5716906785964966
          vf_loss: 0.7958016640610165
    num_agent_steps_sampled: 1107000
    num_agent_steps_trained: 1107000
    num_steps_sampled: 1107000
    num_steps_trained: 110

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1107,30894.4,1107000,-23.982,-18.4,-27.2,239.82


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1108000
  custom_metrics: {}
  date: 2021-10-22_04-21-09
  done: false
  episode_len_mean: 239.62
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -23.962000000000074
  episode_reward_min: -27.200000000000117
  episodes_this_iter: 4
  episodes_total: 3730
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00025960793247079825
          cur_lr: 5.000000000000001e-05
          entropy: 0.12069443886478742
          entropy_coeff: 0.009999999999999998
          kl: 0.002851761290984693
          policy_loss: 0.036580089810821746
          total_loss: 1.061449259519577
          vf_explained_var: 0.2910996377468109
          vf_loss: 1.026075377729204
    num_agent_steps_sampled: 1108000
    num_agent_steps_trained: 1108000
    num_steps_sampled: 1108000
    num_steps_trained: 11080

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1108,30924.3,1108000,-23.962,-18.4,-27.2,239.62


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1109000
  custom_metrics: {}
  date: 2021-10-22_04-21-40
  done: false
  episode_len_mean: 239.42
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -23.94200000000007
  episode_reward_min: -27.200000000000117
  episodes_this_iter: 4
  episodes_total: 3734
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00012980396623539912
          cur_lr: 5.000000000000001e-05
          entropy: 0.12296469973193275
          entropy_coeff: 0.009999999999999998
          kl: 0.001953479520637242
          policy_loss: 0.021946035656664108
          total_loss: 1.138506672779719
          vf_explained_var: 0.21076638996601105
          vf_loss: 1.1177900380558439
    num_agent_steps_sampled: 1109000
    num_agent_steps_trained: 1109000
    num_steps_sampled: 1109000
    num_steps_trained: 1109

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1109,30955.4,1109000,-23.942,-18.4,-27.2,239.42


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1110000
  custom_metrics: {}
  date: 2021-10-22_04-22-10
  done: false
  episode_len_mean: 238.11
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -23.811000000000067
  episode_reward_min: -27.200000000000117
  episodes_this_iter: 5
  episodes_total: 3739
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.490198311769956e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.17421852631701362
          entropy_coeff: 0.009999999999999998
          kl: 0.006556375147716646
          policy_loss: -0.01545161066783799
          total_loss: 1.533469288216697
          vf_explained_var: 0.23222632706165314
          vf_loss: 1.5506626625855764
    num_agent_steps_sampled: 1110000
    num_agent_steps_trained: 1110000
    num_steps_sampled: 1110000
    num_steps_trained: 1110

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1110,30985.4,1110000,-23.811,-18.4,-27.2,238.11


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1111000
  custom_metrics: {}
  date: 2021-10-22_04-22-42
  done: false
  episode_len_mean: 236.86
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -23.686000000000064
  episode_reward_min: -27.200000000000117
  episodes_this_iter: 4
  episodes_total: 3743
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.490198311769956e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.12098683011200694
          entropy_coeff: 0.009999999999999998
          kl: 0.00091659077124358
          policy_loss: 0.04516931606663598
          total_loss: 1.332624856630961
          vf_explained_var: 0.024343686178326607
          vf_loss: 1.2886653595500521
    num_agent_steps_sampled: 1111000
    num_agent_steps_trained: 1111000
    num_steps_sampled: 1111000
    num_steps_trained: 11110

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1111,31017.4,1111000,-23.686,-18.4,-27.2,236.86


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1112000
  custom_metrics: {}
  date: 2021-10-22_04-23-12
  done: false
  episode_len_mean: 236.81
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -23.681000000000058
  episode_reward_min: -27.200000000000117
  episodes_this_iter: 4
  episodes_total: 3747
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.245099155884978e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.19617412947118282
          entropy_coeff: 0.009999999999999998
          kl: 0.006869245707941711
          policy_loss: -0.011746550020244386
          total_loss: 1.0171789778603448
          vf_explained_var: 0.42976516485214233
          vf_loss: 1.0308870348665449
    num_agent_steps_sampled: 1112000
    num_agent_steps_trained: 1112000
    num_steps_sampled: 1112000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1112,31047.6,1112000,-23.681,-18.4,-27.2,236.81




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1113000
  custom_metrics: {}
  date: 2021-10-22_04-24-02
  done: false
  episode_len_mean: 235.56
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -23.55600000000006
  episode_reward_min: -27.200000000000117
  episodes_this_iter: 5
  episodes_total: 3752
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.245099155884978e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.25701042968365884
          entropy_coeff: 0.009999999999999998
          kl: 0.014669147817788976
          policy_loss: 0.012066233985953861
          total_loss: 1.6281423038906522
          vf_explained_var: 0.11611968278884888
          vf_loss: 1.6186456845866308
    num_agent_steps_sampled: 1113000
    num_agent_steps_trained: 1113000
    num_steps_sampled: 1113000
    num_steps_trained: 1113

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1113,31097.2,1113000,-23.556,-18.4,-27.2,235.56


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1114000
  custom_metrics: {}
  date: 2021-10-22_04-24-30
  done: false
  episode_len_mean: 236.23
  episode_media: {}
  episode_reward_max: -18.39999999999999
  episode_reward_mean: -23.623000000000065
  episode_reward_min: -27.200000000000117
  episodes_this_iter: 4
  episodes_total: 3756
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.245099155884978e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.3530336141586304
          entropy_coeff: 0.009999999999999998
          kl: 0.010014192523678073
          policy_loss: -0.0062185336732202105
          total_loss: 1.2764471160040962
          vf_explained_var: 0.2230967879295349
          vf_loss: 1.28619564904107
    num_agent_steps_sampled: 1114000
    num_agent_steps_trained: 1114000
    num_steps_sampled: 1114000
    num_steps_trained: 11140

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1114,31125,1114000,-23.623,-18.4,-27.2,236.23


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1115000
  custom_metrics: {}
  date: 2021-10-22_04-24-59
  done: false
  episode_len_mean: 237.68
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -23.768000000000065
  episode_reward_min: -27.200000000000117
  episodes_this_iter: 4
  episodes_total: 3760
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.245099155884978e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.2506058636638853
          entropy_coeff: 0.009999999999999998
          kl: 0.011257252545796506
          policy_loss: -0.006507134603129493
          total_loss: 0.9711839682526059
          vf_explained_var: 0.46390894055366516
          vf_loss: 0.9801967958609263
    num_agent_steps_sampled: 1115000
    num_agent_steps_trained: 1115000
    num_steps_sampled: 1115000
    num_steps_trained: 1115000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1115,31153.8,1115000,-23.768,-19,-27.2,237.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1116000
  custom_metrics: {}
  date: 2021-10-22_04-25-28
  done: false
  episode_len_mean: 237.87
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -23.78700000000007
  episode_reward_min: -27.200000000000117
  episodes_this_iter: 4
  episodes_total: 3764
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.245099155884978e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.2919140862094031
          entropy_coeff: 0.009999999999999998
          kl: 0.004396014864759066
          policy_loss: 0.04647891533871492
          total_loss: 1.5165545913908216
          vf_explained_var: 0.18405818939208984
          vf_loss: 1.4729946871598563
    num_agent_steps_sampled: 1116000
    num_agent_steps_trained: 1116000
    num_steps_sampled: 1116000
    num_steps_trained: 1116000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1116,31182.9,1116000,-23.787,-19,-27.2,237.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1117000
  custom_metrics: {}
  date: 2021-10-22_04-25-59
  done: false
  episode_len_mean: 238.7
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -23.870000000000065
  episode_reward_min: -27.200000000000117
  episodes_this_iter: 4
  episodes_total: 3768
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.622549577942489e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.2750471718609333
          entropy_coeff: 0.009999999999999998
          kl: 0.00263313466043308
          policy_loss: 0.023810856209860908
          total_loss: 1.4768916328748067
          vf_explained_var: 0.22691220045089722
          vf_loss: 1.4558312071694268
    num_agent_steps_sampled: 1117000
    num_agent_steps_trained: 1117000
    num_steps_sampled: 1117000
    num_steps_trained: 1117000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1117,31214.4,1117000,-23.87,-19,-27.2,238.7


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1118000
  custom_metrics: {}
  date: 2021-10-22_04-26-26
  done: false
  episode_len_mean: 239.93
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -23.993000000000066
  episode_reward_min: -27.700000000000124
  episodes_this_iter: 4
  episodes_total: 3772
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.112747889712445e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.4462220284673903
          entropy_coeff: 0.009999999999999998
          kl: 0.02063533816933282
          policy_loss: 0.011120853655868106
          total_loss: 1.0808919396665362
          vf_explained_var: 0.31206828355789185
          vf_loss: 1.0742331306139628
    num_agent_steps_sampled: 1118000
    num_agent_steps_trained: 1118000
    num_steps_sampled: 1118000
    num_steps_trained: 1118000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1118,31240.8,1118000,-23.993,-19,-27.7,239.93


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1119000
  custom_metrics: {}
  date: 2021-10-22_04-26-54
  done: false
  episode_len_mean: 240.89
  episode_media: {}
  episode_reward_max: -19.0
  episode_reward_mean: -24.08900000000007
  episode_reward_min: -27.700000000000124
  episodes_this_iter: 4
  episodes_total: 3776
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2169121834568667e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.32491178860267006
          entropy_coeff: 0.009999999999999998
          kl: 0.0719639647486771
          policy_loss: 0.07726817776759466
          total_loss: 1.0962321433756086
          vf_explained_var: 0.5735116600990295
          vf_loss: 1.022212196720971
    num_agent_steps_sampled: 1119000
    num_agent_steps_trained: 1119000
    num_steps_sampled: 1119000
    num_steps_trained: 1119000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1119,31269.4,1119000,-24.089,-19,-27.7,240.89




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1120000
  custom_metrics: {}
  date: 2021-10-22_04-27-41
  done: false
  episode_len_mean: 240.03
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.00300000000007
  episode_reward_min: -27.700000000000124
  episodes_this_iter: 4
  episodes_total: 3780
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8253682751853e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.1873027948041757
          entropy_coeff: 0.009999999999999998
          kl: 0.002811300068626268
          policy_loss: -0.024478167792161307
          total_loss: 1.7813930259810553
          vf_explained_var: 0.17606540024280548
          vf_loss: 1.8077441718843248
    num_agent_steps_sampled: 1120000
    num_agent_steps_trained: 1120000
    num_steps_sampled: 1120000
    num_steps_trained: 11200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1120,31316.4,1120000,-24.003,-18.8,-27.7,240.03


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1121000
  custom_metrics: {}
  date: 2021-10-22_04-28-16
  done: false
  episode_len_mean: 239.21
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -23.921000000000067
  episode_reward_min: -27.700000000000124
  episodes_this_iter: 5
  episodes_total: 3785
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.1268413759265e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.11149457767605782
          entropy_coeff: 0.009999999999999998
          kl: 0.0026614007532893584
          policy_loss: -0.0049655870844920475
          total_loss: 2.0613162239392597
          vf_explained_var: 0.21569953858852386
          vf_loss: 2.0673967295222813
    num_agent_steps_sampled: 1121000
    num_agent_steps_trained: 1121000
    num_steps_sampled: 1121000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1121,31351.3,1121000,-23.921,-18.8,-27.7,239.21


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1122000
  custom_metrics: {}
  date: 2021-10-22_04-28-49
  done: false
  episode_len_mean: 238.27
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -23.827000000000066
  episode_reward_min: -27.700000000000124
  episodes_this_iter: 5
  episodes_total: 3790
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.56342068796325e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.12413808579246204
          entropy_coeff: 0.009999999999999998
          kl: 0.0008935977990776599
          policy_loss: 0.035437581688165666
          total_loss: 1.9781292623943754
          vf_explained_var: 0.0252701286226511
          vf_loss: 1.9439330604341296
    num_agent_steps_sampled: 1122000
    num_agent_steps_trained: 1122000
    num_steps_sampled: 1122000
    num_steps_trained: 112

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1122,31383.9,1122000,-23.827,-18.8,-27.7,238.27


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1123000
  custom_metrics: {}
  date: 2021-10-22_04-29-21
  done: false
  episode_len_mean: 237.29
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -23.72900000000007
  episode_reward_min: -27.700000000000124
  episodes_this_iter: 4
  episodes_total: 3794
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.281710343981625e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.18073189953962962
          entropy_coeff: 0.009999999999999998
          kl: 0.00566854424468267
          policy_loss: 0.03656744671364625
          total_loss: 1.693288086520301
          vf_explained_var: 0.017791250720620155
          vf_loss: 1.658527934551239
    num_agent_steps_sampled: 1123000
    num_agent_steps_trained: 1123000
    num_steps_sampled: 1123000
    num_steps_trained: 112300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1123,31416.2,1123000,-23.729,-18.8,-27.7,237.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1124000
  custom_metrics: {}
  date: 2021-10-22_04-29-54
  done: false
  episode_len_mean: 236.06
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -23.606000000000062
  episode_reward_min: -27.700000000000124
  episodes_this_iter: 5
  episodes_total: 3799
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.281710343981625e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.12141490851839383
          entropy_coeff: 0.009999999999999998
          kl: 0.0014446014649041592
          policy_loss: -0.016647408323155508
          total_loss: 2.1845611492792765
          vf_explained_var: 0.04189446195960045
          vf_loss: 2.2024226943651835
    num_agent_steps_sampled: 1124000
    num_agent_steps_trained: 1124000
    num_steps_sampled: 1124000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1124,31449,1124000,-23.606,-18.8,-27.7,236.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1125000
  custom_metrics: {}
  date: 2021-10-22_04-30-24
  done: false
  episode_len_mean: 235.33
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -23.533000000000065
  episode_reward_min: -27.700000000000124
  episodes_this_iter: 4
  episodes_total: 3803
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1408551719908124e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.25342785956131086
          entropy_coeff: 0.009999999999999998
          kl: 0.03204972001742629
          policy_loss: 0.01588710778289371
          total_loss: 1.3135627236631182
          vf_explained_var: 0.22936126589775085
          vf_loss: 1.3002098616626527
    num_agent_steps_sampled: 1125000
    num_agent_steps_trained: 1125000
    num_steps_sampled: 1125000
    num_steps_trained: 112

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1125,31478.4,1125000,-23.533,-18.8,-27.7,235.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1126000
  custom_metrics: {}
  date: 2021-10-22_04-30-56
  done: false
  episode_len_mean: 233.93
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -23.393000000000058
  episode_reward_min: -27.700000000000124
  episodes_this_iter: 5
  episodes_total: 3808
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7112827579862187e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.07414889617098702
          entropy_coeff: 0.009999999999999998
          kl: 0.001915773664436276
          policy_loss: 0.0055636346340179445
          total_loss: 1.7823262254397074
          vf_explained_var: 0.15317612886428833
          vf_loss: 1.7775040533807542
    num_agent_steps_sampled: 1126000
    num_agent_steps_trained: 1126000
    num_steps_sampled: 1126000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1126,31511.3,1126000,-23.393,-18.8,-27.7,233.93




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1127000
  custom_metrics: {}
  date: 2021-10-22_04-31-48
  done: false
  episode_len_mean: 232.51
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.251000000000058
  episode_reward_min: -27.700000000000124
  episodes_this_iter: 4
  episodes_total: 3812
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.556413789931094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.08294841330498456
          entropy_coeff: 0.009999999999999998
          kl: 0.009129943238148128
          policy_loss: 0.004287879996829563
          total_loss: 1.1968153847588434
          vf_explained_var: 0.14325836300849915
          vf_loss: 1.1933569782310063
    num_agent_steps_sampled: 1127000
    num_agent_steps_trained: 1127000
    num_steps_sampled: 1127000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1127,31562.8,1127000,-23.251,-18.7,-27.7,232.51


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1128000
  custom_metrics: {}
  date: 2021-10-22_04-32-22
  done: false
  episode_len_mean: 231.34
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.134000000000057
  episode_reward_min: -27.700000000000124
  episodes_this_iter: 5
  episodes_total: 3817
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.556413789931094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.08176696519884798
          entropy_coeff: 0.009999999999999998
          kl: 0.00913925760575517
          policy_loss: -0.01320113804605272
          total_loss: 2.1258756823009914
          vf_explained_var: 0.0837801918387413
          vf_loss: 2.139894453684489
    num_agent_steps_sampled: 1128000
    num_agent_steps_trained: 1128000
    num_steps_sampled: 1128000
    num_steps_trained: 11280

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1128,31596.7,1128000,-23.134,-18.7,-27.7,231.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1129000
  custom_metrics: {}
  date: 2021-10-22_04-32-55
  done: false
  episode_len_mean: 230.16
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.016000000000055
  episode_reward_min: -27.700000000000124
  episodes_this_iter: 5
  episodes_total: 3822
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.556413789931094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.028329873809383975
          entropy_coeff: 0.009999999999999998
          kl: 0.0014992062816600162
          policy_loss: -0.010324245111809837
          total_loss: 2.050401516755422
          vf_explained_var: 0.04749344661831856
          vf_loss: 2.0610090679592554
    num_agent_steps_sampled: 1129000
    num_agent_steps_trained: 1129000
    num_steps_sampled: 1129000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1129,31629.9,1129000,-23.016,-18.7,-27.7,230.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1130000
  custom_metrics: {}
  date: 2021-10-22_04-33-28
  done: false
  episode_len_mean: 229.01
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.901000000000053
  episode_reward_min: -27.700000000000124
  episodes_this_iter: 4
  episodes_total: 3826
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.278206894965547e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.021838760655373334
          entropy_coeff: 0.009999999999999998
          kl: 0.0004969504984448875
          policy_loss: 0.04620255016618305
          total_loss: 1.229861670732498
          vf_explained_var: 0.031072115525603294
          vf_loss: 1.1838775042030547
    num_agent_steps_sampled: 1130000
    num_agent_steps_trained: 1130000
    num_steps_sampled: 1130000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1130,31663.2,1130000,-22.901,-18.7,-27.7,229.01


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1131000
  custom_metrics: {}
  date: 2021-10-22_04-34-00
  done: false
  episode_len_mean: 227.89
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.78900000000005
  episode_reward_min: -27.700000000000124
  episodes_this_iter: 5
  episodes_total: 3831
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1391034474827734e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.018558917867226733
          entropy_coeff: 0.009999999999999998
          kl: 0.014368591865471785
          policy_loss: -0.012082747038867738
          total_loss: 2.019967186450958
          vf_explained_var: 0.052719201892614365
          vf_loss: 2.0322355031967163
    num_agent_steps_sampled: 1131000
    num_agent_steps_trained: 1131000
    num_steps_sampled: 1131000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1131,31695.2,1131000,-22.789,-18.7,-27.7,227.89


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1132000
  custom_metrics: {}
  date: 2021-10-22_04-34-33
  done: false
  episode_len_mean: 227.28
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.728000000000048
  episode_reward_min: -27.700000000000124
  episodes_this_iter: 5
  episodes_total: 3836
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1391034474827734e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.05431931898411777
          entropy_coeff: 0.009999999999999998
          kl: 0.004809015541753138
          policy_loss: -0.007355327324734794
          total_loss: 1.8264463557137383
          vf_explained_var: 0.0885794535279274
          vf_loss: 1.8343448718388875
    num_agent_steps_sampled: 1132000
    num_agent_steps_trained: 1132000
    num_steps_sampled: 1132000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1132,31728.1,1132000,-22.728,-18.7,-27.7,227.28




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1133000
  custom_metrics: {}
  date: 2021-10-22_04-35-18
  done: false
  episode_len_mean: 227.96
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.79600000000005
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3840
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0695517237413867e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.2036587180983689
          entropy_coeff: 0.009999999999999998
          kl: 0.01010991810353546
          policy_loss: 0.12865407657292155
          total_loss: 1.0936282879776424
          vf_explained_var: 0.1312398910522461
          vf_loss: 0.9670108155243926
    num_agent_steps_sampled: 1133000
    num_agent_steps_trained: 1133000
    num_steps_sampled: 1133000
    num_steps_trained: 1133000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1133,31772.8,1133000,-22.796,-18.7,-37.3,227.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1134000
  custom_metrics: {}
  date: 2021-10-22_04-35-53
  done: false
  episode_len_mean: 227.63
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.763000000000048
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3844
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0695517237413867e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.0395234296305312
          entropy_coeff: 0.009999999999999998
          kl: 0.001738318494788787
          policy_loss: 0.03860294785764482
          total_loss: 1.3527805911170112
          vf_explained_var: 0.019232891499996185
          vf_loss: 1.3145728647708892
    num_agent_steps_sampled: 1134000
    num_agent_steps_trained: 1134000
    num_steps_sampled: 1134000
    num_steps_trained: 113

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1134,31807.9,1134000,-22.763,-18.7,-37.3,227.63


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1135000
  custom_metrics: {}
  date: 2021-10-22_04-36-26
  done: false
  episode_len_mean: 226.37
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.637000000000047
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 5
  episodes_total: 3849
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.3477586187069335e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.03564065123597781
          entropy_coeff: 0.009999999999999998
          kl: 0.0007878722261325935
          policy_loss: -0.01316567427582211
          total_loss: 1.951075682375166
          vf_explained_var: 0.03475835919380188
          vf_loss: 1.964597753683726
    num_agent_steps_sampled: 1135000
    num_agent_steps_trained: 1135000
    num_steps_sampled: 1135000
    num_steps_trained: 113

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1135,31840.4,1135000,-22.637,-18.7,-37.3,226.37


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1136000
  custom_metrics: {}
  date: 2021-10-22_04-36-59
  done: false
  episode_len_mean: 225.95
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.59500000000005
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 5
  episodes_total: 3854
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6738793093534668e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.04142278697755602
          entropy_coeff: 0.009999999999999998
          kl: 8.531686319805943e-05
          policy_loss: 0.02604269782702128
          total_loss: 1.6513150612513223
          vf_explained_var: 0.03200609236955643
          vf_loss: 1.6256865859031677
    num_agent_steps_sampled: 1136000
    num_agent_steps_trained: 1136000
    num_steps_sampled: 1136000
    num_steps_trained: 113

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1136,31873.3,1136000,-22.595,-18.7,-37.3,225.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1137000
  custom_metrics: {}
  date: 2021-10-22_04-37-32
  done: false
  episode_len_mean: 224.2
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.42000000000004
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3858
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3369396546767334e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.040011604709757696
          entropy_coeff: 0.009999999999999998
          kl: 0.0007031187596174801
          policy_loss: 0.046810025804572636
          total_loss: 1.299448643790351
          vf_explained_var: 0.019330618903040886
          vf_loss: 1.2530387534035576
    num_agent_steps_sampled: 1137000
    num_agent_steps_trained: 1137000
    num_steps_sampled: 1137000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1137,31906.5,1137000,-22.42,-18.7,-37.3,224.2


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1138000
  custom_metrics: {}
  date: 2021-10-22_04-38-05
  done: false
  episode_len_mean: 222.82
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.282000000000043
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 5
  episodes_total: 3863
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.684698273383667e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.04922240695191754
          entropy_coeff: 0.009999999999999998
          kl: 0.00030274363361137486
          policy_loss: -0.013794614374637604
          total_loss: 1.8919531491067674
          vf_explained_var: 0.03607380390167236
          vf_loss: 1.9062399917178683
    num_agent_steps_sampled: 1138000
    num_agent_steps_trained: 1138000
    num_steps_sampled: 1138000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1138,31939.4,1138000,-22.282,-18.7,-37.3,222.82


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1139000
  custom_metrics: {}
  date: 2021-10-22_04-38-38
  done: false
  episode_len_mean: 221.75
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.175000000000036
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3867
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3423491366918335e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.04396120740307702
          entropy_coeff: 0.009999999999999998
          kl: 0.00016334654712006407
          policy_loss: -0.013877305678195423
          total_loss: 1.4350598401493497
          vf_explained_var: 0.022222157567739487
          vf_loss: 1.4493767632378471
    num_agent_steps_sampled: 1139000
    num_agent_steps_trained: 1139000
    num_steps_sampled: 1139000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1139,31972.1,1139000,-22.175,-18.7,-37.3,221.75




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1140000
  custom_metrics: {}
  date: 2021-10-22_04-39-29
  done: false
  episode_len_mean: 219.76
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.97600000000004
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 5
  episodes_total: 3872
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6711745683459167e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.0626793087977502
          entropy_coeff: 0.009999999999999998
          kl: 0.0033815544461061916
          policy_loss: 0.01998597275879648
          total_loss: 1.6991672608587476
          vf_explained_var: 0.08912355452775955
          vf_loss: 1.679808078871833
    num_agent_steps_sampled: 1140000
    num_agent_steps_trained: 1140000
    num_steps_sampled: 1140000
    num_steps_trained: 11400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1140,32023.9,1140000,-21.976,-18.7,-37.3,219.76


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1141000
  custom_metrics: {}
  date: 2021-10-22_04-40-01
  done: false
  episode_len_mean: 217.59
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.759000000000043
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 5
  episodes_total: 3877
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.355872841729584e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.03978135677882367
          entropy_coeff: 0.009999999999999998
          kl: 0.0054886034293928155
          policy_loss: -0.008213360276487138
          total_loss: 1.8410113116105398
          vf_explained_var: 0.2008502185344696
          vf_loss: 1.8496224761009217
    num_agent_steps_sampled: 1141000
    num_agent_steps_trained: 1141000
    num_steps_sampled: 1141000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1141,32055.9,1141000,-21.759,-18.7,-37.3,217.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1142000
  custom_metrics: {}
  date: 2021-10-22_04-40-34
  done: false
  episode_len_mean: 217.34
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.73400000000004
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 5
  episodes_total: 3882
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.355872841729584e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.07328410622042915
          entropy_coeff: 0.009999999999999998
          kl: 0.032635579748268466
          policy_loss: 0.0462425677312745
          total_loss: 1.5757006155120001
          vf_explained_var: 0.10665489733219147
          vf_loss: 1.5301908903651766
    num_agent_steps_sampled: 1142000
    num_agent_steps_trained: 1142000
    num_steps_sampled: 1142000
    num_steps_trained: 114200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1142,32088.7,1142000,-21.734,-18.7,-37.3,217.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1143000
  custom_metrics: {}
  date: 2021-10-22_04-41-07
  done: false
  episode_len_mean: 217.26
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.72600000000004
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3886
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2533809262594376e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.04437476222713788
          entropy_coeff: 0.009999999999999998
          kl: 0.0033201754735994625
          policy_loss: 0.043454315678940876
          total_loss: 1.182609584596422
          vf_explained_var: 0.2192060500383377
          vf_loss: 1.1395990272363028
    num_agent_steps_sampled: 1143000
    num_agent_steps_trained: 1143000
    num_steps_sampled: 1143000
    num_steps_trained: 1143

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1143,32121.9,1143000,-21.726,-18.7,-37.3,217.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1144000
  custom_metrics: {}
  date: 2021-10-22_04-41-40
  done: false
  episode_len_mean: 217.12
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.71200000000004
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 5
  episodes_total: 3891
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.266904631297188e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.023754430438081422
          entropy_coeff: 0.009999999999999998
          kl: 0.0003335285200680321
          policy_loss: -0.014492846114767923
          total_loss: 1.8791811426480611
          vf_explained_var: 0.04488920792937279
          vf_loss: 1.8939115153418646
    num_agent_steps_sampled: 1144000
    num_agent_steps_trained: 1144000
    num_steps_sampled: 1144000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1144,32154.5,1144000,-21.712,-18.7,-37.3,217.12


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1145000
  custom_metrics: {}
  date: 2021-10-22_04-42-13
  done: false
  episode_len_mean: 216.93
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.693000000000037
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 5
  episodes_total: 3896
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.133452315648594e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.021858933547304737
          entropy_coeff: 0.009999999999999998
          kl: 0.00027400560421466166
          policy_loss: 0.040959333835376636
          total_loss: 1.5509489019711813
          vf_explained_var: 0.03314158692955971
          vf_loss: 1.5102081722683376
    num_agent_steps_sampled: 1145000
    num_agent_steps_trained: 1145000
    num_steps_sampled: 1145000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1145,32187.1,1145000,-21.693,-18.7,-37.3,216.93




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1146000
  custom_metrics: {}
  date: 2021-10-22_04-43-03
  done: false
  episode_len_mean: 216.58
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.658000000000037
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3900
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.566726157824297e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.02079685375922256
          entropy_coeff: 0.009999999999999998
          kl: 0.00045048782621102864
          policy_loss: 0.010699202617009481
          total_loss: 1.3018087095684476
          vf_explained_var: 0.020343031734228134
          vf_loss: 1.291317461596595
    num_agent_steps_sampled: 1146000
    num_agent_steps_trained: 1146000
    num_steps_sampled: 1146000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1146,32237.5,1146000,-21.658,-18.7,-37.3,216.58


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1147000
  custom_metrics: {}
  date: 2021-10-22_04-43-37
  done: false
  episode_len_mean: 215.62
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.56200000000003
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 5
  episodes_total: 3905
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.833630789121485e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.048930541550119715
          entropy_coeff: 0.009999999999999998
          kl: 0.004455035689550612
          policy_loss: 0.017611573305394916
          total_loss: 1.5147104448742337
          vf_explained_var: 0.1433153748512268
          vf_loss: 1.4975881695747375
    num_agent_steps_sampled: 1147000
    num_agent_steps_trained: 1147000
    num_steps_sampled: 1147000
    num_steps_trained: 1147

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1147,32271.4,1147000,-21.562,-18.7,-37.3,215.62


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1148000
  custom_metrics: {}
  date: 2021-10-22_04-44-10
  done: false
  episode_len_mean: 215.85
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.585000000000036
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 5
  episodes_total: 3910
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.9168153945607427e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.02923758744986521
          entropy_coeff: 0.009999999999999998
          kl: 0.0010627451939755907
          policy_loss: -0.00023361461030112373
          total_loss: 1.7184642765257094
          vf_explained_var: 0.2021787464618683
          vf_loss: 1.7189902676476372
    num_agent_steps_sampled: 1148000
    num_agent_steps_trained: 1148000
    num_steps_sampled: 1148000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1148,32304.2,1148000,-21.585,-18.7,-37.3,215.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1149000
  custom_metrics: {}
  date: 2021-10-22_04-44-43
  done: false
  episode_len_mean: 215.79
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.579000000000036
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3914
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9584076972803713e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.0370853276612858
          entropy_coeff: 0.009999999999999998
          kl: 0.0022499906225108804
          policy_loss: 0.009797046788864665
          total_loss: 1.4965097890959846
          vf_explained_var: 0.02054399438202381
          vf_loss: 1.4870835860570273
    num_agent_steps_sampled: 1149000
    num_agent_steps_trained: 1149000
    num_steps_sampled: 1149000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1149,32337.7,1149000,-21.579,-18.7,-37.3,215.79


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1150000
  custom_metrics: {}
  date: 2021-10-22_04-45-16
  done: false
  episode_len_mean: 215.78
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.578000000000035
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 5
  episodes_total: 3919
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.792038486401857e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.16504129014081426
          entropy_coeff: 0.009999999999999998
          kl: 0.038931019821269154
          policy_loss: -0.03568683494296339
          total_loss: 1.2766242543856303
          vf_explained_var: 0.36488813161849976
          vf_loss: 1.3139614992671542
    num_agent_steps_sampled: 1150000
    num_agent_steps_trained: 1150000
    num_steps_sampled: 1150000
    num_steps_trained: 115

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1150,32370.4,1150000,-21.578,-18.7,-37.3,215.78


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1151000
  custom_metrics: {}
  date: 2021-10-22_04-45-49
  done: false
  episode_len_mean: 215.81
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.58100000000003
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 5
  episodes_total: 3924
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.468805772960278e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.09259336433476872
          entropy_coeff: 0.009999999999999998
          kl: 0.0009961650419533501
          policy_loss: -0.00552155367202229
          total_loss: 1.355950587325626
          vf_explained_var: 0.07980059832334518
          vf_loss: 1.3623980641365052
    num_agent_steps_sampled: 1151000
    num_agent_steps_trained: 1151000
    num_steps_sampled: 1151000
    num_steps_trained: 1151

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1151,32403,1151000,-21.581,-18.7,-37.3,215.81


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1152000
  custom_metrics: {}
  date: 2021-10-22_04-46-22
  done: false
  episode_len_mean: 215.85
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.585000000000036
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3928
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.34402886480139e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.08819105488558611
          entropy_coeff: 0.009999999999999998
          kl: 0.010059945192765025
          policy_loss: 0.06872993442747329
          total_loss: 0.7818719834089279
          vf_explained_var: 0.14515811204910278
          vf_loss: 0.7140239602989621
    num_agent_steps_sampled: 1152000
    num_agent_steps_trained: 1152000
    num_steps_sampled: 1152000
    num_steps_trained: 11520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1152,32436.5,1152000,-21.585,-18.7,-37.3,215.85




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1153000
  custom_metrics: {}
  date: 2021-10-22_04-47-12
  done: false
  episode_len_mean: 215.6
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.56000000000003
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 5
  episodes_total: 3933
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.34402886480139e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.08440146239267456
          entropy_coeff: 0.009999999999999998
          kl: 0.0034532560810255574
          policy_loss: -0.009616734666956795
          total_loss: 1.5053963647948372
          vf_explained_var: 0.09662172198295593
          vf_loss: 1.5158570965131124
    num_agent_steps_sampled: 1153000
    num_agent_steps_trained: 1153000
    num_steps_sampled: 1153000
    num_steps_trained: 1153

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1153,32486.7,1153000,-21.56,-18.7,-37.3,215.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1154000
  custom_metrics: {}
  date: 2021-10-22_04-47-46
  done: false
  episode_len_mean: 215.68
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.568000000000033
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 5
  episodes_total: 3938
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.672014432400695e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.03932727523561981
          entropy_coeff: 0.009999999999999998
          kl: 0.0005107999260593527
          policy_loss: -0.04967625211510393
          total_loss: 1.5482706348101298
          vf_explained_var: 0.04663790017366409
          vf_loss: 1.5983401483959623
    num_agent_steps_sampled: 1154000
    num_agent_steps_trained: 1154000
    num_steps_sampled: 1154000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1154,32520.7,1154000,-21.568,-18.7,-37.3,215.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1155000
  custom_metrics: {}
  date: 2021-10-22_04-48-19
  done: false
  episode_len_mean: 214.38
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.43800000000003
  episode_reward_min: -22.100000000000044
  episodes_this_iter: 4
  episodes_total: 3942
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8360072162003476e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.03785653462012609
          entropy_coeff: 0.009999999999999998
          kl: 0.0009746375588539241
          policy_loss: 0.03285528413123555
          total_loss: 1.2582588367991978
          vf_explained_var: 0.03297848626971245
          vf_loss: 1.2257821202278136
    num_agent_steps_sampled: 1155000
    num_agent_steps_trained: 1155000
    num_steps_sampled: 1155000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1155,32553.4,1155000,-21.438,-18.7,-22.1,214.38


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1156000
  custom_metrics: {}
  date: 2021-10-22_04-48-52
  done: false
  episode_len_mean: 214.37
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.43700000000003
  episode_reward_min: -22.100000000000044
  episodes_this_iter: 5
  episodes_total: 3947
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.180036081001738e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.03715206390867631
          entropy_coeff: 0.009999999999999998
          kl: 0.00012000668445201078
          policy_loss: -0.0124911453988817
          total_loss: 1.6793303887049358
          vf_explained_var: 0.0437026284635067
          vf_loss: 1.6921930538283454
    num_agent_steps_sampled: 1156000
    num_agent_steps_trained: 1156000
    num_steps_sampled: 1156000
    num_steps_trained: 115

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1156,32586.4,1156000,-21.437,-18.7,-22.1,214.37


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1157000
  custom_metrics: {}
  date: 2021-10-22_04-49-25
  done: false
  episode_len_mean: 214.38
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.43800000000003
  episode_reward_min: -22.100000000000044
  episodes_this_iter: 5
  episodes_total: 3952
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.590018040500869e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.06216226352585687
          entropy_coeff: 0.009999999999999998
          kl: 0.0009544952654534811
          policy_loss: 0.038592979767256314
          total_loss: 1.3188998765415616
          vf_explained_var: 0.08529139310121536
          vf_loss: 1.2809285071161058
    num_agent_steps_sampled: 1157000
    num_agent_steps_trained: 1157000
    num_steps_sampled: 1157000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1157,32618.9,1157000,-21.438,-18.7,-22.1,214.38


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1158000
  custom_metrics: {}
  date: 2021-10-22_04-49-57
  done: false
  episode_len_mean: 214.47
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.447000000000024
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 4
  episodes_total: 3956
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2950090202504345e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.0680093788024452
          entropy_coeff: 0.009999999999999998
          kl: 0.012442373323287646
          policy_loss: 0.03740478017263942
          total_loss: 1.0087838100062476
          vf_explained_var: 0.29780805110931396
          vf_loss: 0.9720591253704495
    num_agent_steps_sampled: 1158000
    num_agent_steps_trained: 1158000
    num_steps_sampled: 1158000
    num_steps_trained: 115

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1158,32651.2,1158000,-21.447,-18.7,-22.2,214.47




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1159000
  custom_metrics: {}
  date: 2021-10-22_04-50-49
  done: false
  episode_len_mean: 214.15
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.41500000000003
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 5
  episodes_total: 3961
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2950090202504345e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.10940350159588787
          entropy_coeff: 0.009999999999999998
          kl: 0.03859410695850285
          policy_loss: -0.00917951539158821
          total_loss: 1.6304080327351889
          vf_explained_var: 0.17586243152618408
          vf_loss: 1.6406815767288208
    num_agent_steps_sampled: 1159000
    num_agent_steps_trained: 1159000
    num_steps_sampled: 1159000
    num_steps_trained: 115

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1159,32702.7,1159000,-21.415,-18.6,-22.2,214.15


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1160000
  custom_metrics: {}
  date: 2021-10-22_04-51-21
  done: false
  episode_len_mean: 214.12
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.41200000000003
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 5
  episodes_total: 3966
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4425135303756523e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.054649736773636604
          entropy_coeff: 0.009999999999999998
          kl: 0.0005489816934432746
          policy_loss: 0.026316159301333956
          total_loss: 1.4240797956784566
          vf_explained_var: 0.04748491197824478
          vf_loss: 1.3983101301723055
    num_agent_steps_sampled: 1160000
    num_agent_steps_trained: 1160000
    num_steps_sampled: 1160000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1160,32735.2,1160000,-21.412,-18.6,-22.2,214.12


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1161000
  custom_metrics: {}
  date: 2021-10-22_04-51-54
  done: false
  episode_len_mean: 214.38
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.43800000000003
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 4
  episodes_total: 3970
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7212567651878261e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.06488942744003402
          entropy_coeff: 0.009999999999999998
          kl: 0.00176023552695947
          policy_loss: 0.04619700246387058
          total_loss: 1.1915376861890157
          vf_explained_var: 0.02634314075112343
          vf_loss: 1.1459895796245998
    num_agent_steps_sampled: 1161000
    num_agent_steps_trained: 1161000
    num_steps_sampled: 1161000
    num_steps_trained: 1161

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1161,32768.4,1161000,-21.438,-18.6,-22.2,214.38


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1162000
  custom_metrics: {}
  date: 2021-10-22_04-52-27
  done: false
  episode_len_mean: 214.35
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.435000000000027
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 5
  episodes_total: 3975
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.606283825939131e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.11509335529473093
          entropy_coeff: 0.009999999999999998
          kl: 0.013016019459626726
          policy_loss: -0.01543639823794365
          total_loss: 1.6127725429005093
          vf_explained_var: 0.10249722748994827
          vf_loss: 1.6293598651885985
    num_agent_steps_sampled: 1162000
    num_agent_steps_trained: 1162000
    num_steps_sampled: 1162000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1162,32801.4,1162000,-21.435,-18.6,-22.2,214.35


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1163000
  custom_metrics: {}
  date: 2021-10-22_04-53-00
  done: false
  episode_len_mean: 214.43
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.44300000000003
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 4
  episodes_total: 3979
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.606283825939131e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.10360541095336279
          entropy_coeff: 0.009999999999999998
          kl: 0.0005671731848429889
          policy_loss: -0.012773667648434638
          total_loss: 1.3064637382825215
          vf_explained_var: 0.02371601015329361
          vf_loss: 1.3202734602822197
    num_agent_steps_sampled: 1163000
    num_agent_steps_trained: 1163000
    num_steps_sampled: 1163000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1163,32834,1163000,-21.443,-18.6,-22.2,214.43


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1164000
  custom_metrics: {}
  date: 2021-10-22_04-53-33
  done: false
  episode_len_mean: 214.46
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.44600000000003
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 5
  episodes_total: 3984
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3031419129695653e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.08489797429905997
          entropy_coeff: 0.009999999999999998
          kl: 0.0005908102122497717
          policy_loss: 0.008690169039699767
          total_loss: 1.6794388135274252
          vf_explained_var: 0.026940366253256798
          vf_loss: 1.671597625149621
    num_agent_steps_sampled: 1164000
    num_agent_steps_trained: 1164000
    num_steps_sampled: 1164000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1164,32866.8,1164000,-21.446,-18.6,-22.2,214.46


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1165000
  custom_metrics: {}
  date: 2021-10-22_04-54-06
  done: false
  episode_len_mean: 214.54
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.45400000000003
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 5
  episodes_total: 3989
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1515709564847827e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.07841025615731875
          entropy_coeff: 0.009999999999999998
          kl: 0.0003934166413495631
          policy_loss: -0.0011155020859506396
          total_loss: 1.6973077098528544
          vf_explained_var: 0.040321722626686096
          vf_loss: 1.6992073350482517
    num_agent_steps_sampled: 1165000
    num_agent_steps_trained: 1165000
    num_steps_sampled: 1165000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1165,32900,1165000,-21.454,-18.6,-22.2,214.54




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1166000
  custom_metrics: {}
  date: 2021-10-22_04-54-58
  done: false
  episode_len_mean: 214.34
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.434000000000033
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 4
  episodes_total: 3993
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0757854782423913e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.12324885800480842
          entropy_coeff: 0.009999999999999998
          kl: 0.002932303439534915
          policy_loss: -0.02293084462483724
          total_loss: 1.2135083900557624
          vf_explained_var: 0.16372054815292358
          vf_loss: 1.237671724955241
    num_agent_steps_sampled: 1166000
    num_agent_steps_trained: 1166000
    num_steps_sampled: 1166000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1166,32951.6,1166000,-21.434,-18.6,-22.2,214.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1167000
  custom_metrics: {}
  date: 2021-10-22_04-55-31
  done: false
  episode_len_mean: 214.48
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.448000000000032
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 5
  episodes_total: 3998
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.378927391211957e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.16195207635561626
          entropy_coeff: 0.009999999999999998
          kl: 0.01207722642212629
          policy_loss: 0.027890202071931627
          total_loss: 1.2058808796935612
          vf_explained_var: 0.21429286897182465
          vf_loss: 1.179610193769137
    num_agent_steps_sampled: 1167000
    num_agent_steps_trained: 1167000
    num_steps_sampled: 1167000
    num_steps_trained: 1167

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1167,32984.5,1167000,-21.448,-18.6,-22.2,214.48


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1168000
  custom_metrics: {}
  date: 2021-10-22_04-56-05
  done: false
  episode_len_mean: 214.77
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.47700000000003
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 5
  episodes_total: 4003
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.378927391211957e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.1509756532808145
          entropy_coeff: 0.009999999999999998
          kl: 0.007074379054997356
          policy_loss: -0.013296890258789062
          total_loss: 1.8119929287168715
          vf_explained_var: 0.03365639969706535
          vf_loss: 1.8267995675404867
    num_agent_steps_sampled: 1168000
    num_agent_steps_trained: 1168000
    num_steps_sampled: 1168000
    num_steps_trained: 116

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1168,33018.7,1168000,-21.477,-18.6,-22.2,214.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1169000
  custom_metrics: {}
  date: 2021-10-22_04-56-38
  done: false
  episode_len_mean: 214.78
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.478000000000034
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 4
  episodes_total: 4007
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.378927391211957e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.1423594582411978
          entropy_coeff: 0.009999999999999998
          kl: 0.0002499548069924313
          policy_loss: -0.04615090530779627
          total_loss: 1.3871560361650255
          vf_explained_var: 0.022836096584796906
          vf_loss: 1.4347305271360609
    num_agent_steps_sampled: 1169000
    num_agent_steps_trained: 1169000
    num_steps_sampled: 1169000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1169,33051.9,1169000,-21.478,-18.6,-22.2,214.78


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1170000
  custom_metrics: {}
  date: 2021-10-22_04-57-10
  done: false
  episode_len_mean: 214.83
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.483000000000033
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 5
  episodes_total: 4012
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6894636956059783e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.11458374849624103
          entropy_coeff: 0.009999999999999998
          kl: 0.0010160559787736013
          policy_loss: -0.004053045064210892
          total_loss: 1.8390994522306654
          vf_explained_var: 0.0336405411362648
          vf_loss: 1.8442983481619093
    num_agent_steps_sampled: 1170000
    num_agent_steps_trained: 1170000
    num_steps_sampled: 1170000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1170,33084.2,1170000,-21.483,-18.6,-22.2,214.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1171000
  custom_metrics: {}
  date: 2021-10-22_04-57-43
  done: false
  episode_len_mean: 214.84
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.484000000000034
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 5
  episodes_total: 4017
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3447318478029892e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.12191985969742139
          entropy_coeff: 0.009999999999999998
          kl: 0.0035476028417807527
          policy_loss: -0.011633551534679201
          total_loss: 1.7407543831401402
          vf_explained_var: 0.061070676892995834
          vf_loss: 1.75360715786616
    num_agent_steps_sampled: 1171000
    num_agent_steps_trained: 1171000
    num_steps_sampled: 1171000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1171,33116.8,1171000,-21.484,-18.6,-22.2,214.84




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1172000
  custom_metrics: {}
  date: 2021-10-22_04-58-34
  done: false
  episode_len_mean: 214.55
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.455000000000037
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 4
  episodes_total: 4021
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.723659239014946e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.12634153622719976
          entropy_coeff: 0.009999999999999998
          kl: 0.0006278612675464911
          policy_loss: -0.018378757602638667
          total_loss: 1.3661140547858344
          vf_explained_var: 0.02625182457268238
          vf_loss: 1.3857562449243335
    num_agent_steps_sampled: 1172000
    num_agent_steps_trained: 1172000
    num_steps_sampled: 1172000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1172,33167.8,1172000,-21.455,-18.6,-22.2,214.55


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1173000
  custom_metrics: {}
  date: 2021-10-22_04-59-07
  done: false
  episode_len_mean: 214.5
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.450000000000028
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 5
  episodes_total: 4026
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.361829619507473e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.13517384628454845
          entropy_coeff: 0.009999999999999998
          kl: 0.000625145037446965
          policy_loss: 0.014545444233549965
          total_loss: 1.650820677810245
          vf_explained_var: 0.029241686686873436
          vf_loss: 1.6376269777615866
    num_agent_steps_sampled: 1173000
    num_agent_steps_trained: 1173000
    num_steps_sampled: 1173000
    num_steps_trained: 117

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1173,33200.4,1173000,-21.45,-18.6,-22.2,214.5


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1174000
  custom_metrics: {}
  date: 2021-10-22_04-59-40
  done: false
  episode_len_mean: 214.77
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.477000000000025
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 5
  episodes_total: 4031
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6809148097537365e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.1520664064420594
          entropy_coeff: 0.009999999999999998
          kl: 0.004493044427111196
          policy_loss: -0.014524317036072413
          total_loss: 1.774344997935825
          vf_explained_var: 0.037609830498695374
          vf_loss: 1.790389985508389
    num_agent_steps_sampled: 1174000
    num_agent_steps_trained: 1174000
    num_steps_sampled: 1174000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1174,33233.4,1174000,-21.477,-18.6,-22.2,214.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1175000
  custom_metrics: {}
  date: 2021-10-22_05-00-13
  done: false
  episode_len_mean: 214.74
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.474000000000032
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 4
  episodes_total: 4035
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.404574048768682e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.161164447830783
          entropy_coeff: 0.009999999999999998
          kl: 0.0005721567344340015
          policy_loss: -0.05757628074950642
          total_loss: 1.3439915696779887
          vf_explained_var: 0.023905595764517784
          vf_loss: 1.403179493215349
    num_agent_steps_sampled: 1175000
    num_agent_steps_trained: 1175000
    num_steps_sampled: 1175000
    num_steps_trained: 117

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1175,33266.7,1175000,-21.474,-18.6,-22.2,214.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1176000
  custom_metrics: {}
  date: 2021-10-22_05-00-45
  done: false
  episode_len_mean: 214.69
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.469000000000033
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 5
  episodes_total: 4040
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.202287024384341e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.19017877727746962
          entropy_coeff: 0.009999999999999998
          kl: 0.0008332929036744158
          policy_loss: -0.0027690686285495756
          total_loss: 1.8040621333652072
          vf_explained_var: 0.03744179755449295
          vf_loss: 1.8087329811520048
    num_agent_steps_sampled: 1176000
    num_agent_steps_trained: 1176000
    num_steps_sampled: 1176000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1176,33298.8,1176000,-21.469,-18.6,-22.2,214.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1177000
  custom_metrics: {}
  date: 2021-10-22_05-01-18
  done: false
  episode_len_mean: 214.71
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.471000000000025
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 5
  episodes_total: 4045
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1011435121921706e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.166526820924547
          entropy_coeff: 0.009999999999999998
          kl: 0.008298400910203204
          policy_loss: -0.011809557179609935
          total_loss: 1.7265517903698815
          vf_explained_var: 0.1334451287984848
          vf_loss: 1.7400266336070167
    num_agent_steps_sampled: 1177000
    num_agent_steps_trained: 1177000
    num_steps_sampled: 1177000
    num_steps_trained: 117

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1177,33331.9,1177000,-21.471,-18.6,-22.2,214.71


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1178000
  custom_metrics: {}
  date: 2021-10-22_05-01-52
  done: false
  episode_len_mean: 214.69
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.46900000000003
  episode_reward_min: -22.200000000000045
  episodes_this_iter: 4
  episodes_total: 4049
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1011435121921706e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.2226149885190858
          entropy_coeff: 0.009999999999999998
          kl: 0.05604091599275378
          policy_loss: 0.00886355886856715
          total_loss: 1.1995297325981988
          vf_explained_var: 0.08598775416612625
          vf_loss: 1.1928923282358381
    num_agent_steps_sampled: 1178000
    num_agent_steps_trained: 1178000
    num_steps_sampled: 1178000
    num_steps_trained: 11780

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1178,33365.3,1178000,-21.469,-18.6,-22.2,214.69




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1179000
  custom_metrics: {}
  date: 2021-10-22_05-02-43
  done: false
  episode_len_mean: 214.34
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.43400000000003
  episode_reward_min: -21.80000000000004
  episodes_this_iter: 5
  episodes_total: 4054
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1517152682882554e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.1225238174200058
          entropy_coeff: 0.009999999999999998
          kl: 0.0005815050057084726
          policy_loss: 0.008483825872341791
          total_loss: 1.7063228991296557
          vf_explained_var: 0.04060650244355202
          vf_loss: 1.699064314365387
    num_agent_steps_sampled: 1179000
    num_agent_steps_trained: 1179000
    num_steps_sampled: 1179000
    num_steps_trained: 1179

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1179,33416.5,1179000,-21.434,-18.6,-21.8,214.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1180000
  custom_metrics: {}
  date: 2021-10-22_05-03-16
  done: false
  episode_len_mean: 214.34
  episode_media: {}
  episode_reward_max: -18.599999999999994
  episode_reward_mean: -21.43400000000003
  episode_reward_min: -21.80000000000004
  episodes_this_iter: 5
  episodes_total: 4059
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5758576341441277e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.15197982357607948
          entropy_coeff: 0.009999999999999998
          kl: 0.0021617336101798588
          policy_loss: -0.013870538440015581
          total_loss: 1.5628121144241758
          vf_explained_var: 0.21860814094543457
          vf_loss: 1.5782024476263259
    num_agent_steps_sampled: 1180000
    num_agent_steps_trained: 1180000
    num_steps_sampled: 1180000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1180,33449.6,1180000,-21.434,-18.6,-21.8,214.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1181000
  custom_metrics: {}
  date: 2021-10-22_05-03-49
  done: false
  episode_len_mean: 214.64
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -21.464000000000027
  episode_reward_min: -21.80000000000004
  episodes_this_iter: 4
  episodes_total: 4063
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.879288170720639e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.15300565974579916
          entropy_coeff: 0.009999999999999998
          kl: 0.00030500279870419715
          policy_loss: -0.04047444073690309
          total_loss: 1.3728985720210605
          vf_explained_var: 0.0219135619699955
          vf_loss: 1.4149030844370525
    num_agent_steps_sampled: 1181000
    num_agent_steps_trained: 1181000
    num_steps_sampled: 1181000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1181,33482.4,1181000,-21.464,-18.8,-21.8,214.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1182000
  custom_metrics: {}
  date: 2021-10-22_05-04-23
  done: false
  episode_len_mean: 214.63
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -21.463000000000033
  episode_reward_min: -21.80000000000004
  episodes_this_iter: 5
  episodes_total: 4068
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.939644085360319e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.13869106736448075
          entropy_coeff: 0.009999999999999998
          kl: 0.0010235177909790573
          policy_loss: -0.005430534482002258
          total_loss: 1.818944091267056
          vf_explained_var: 0.033745989203453064
          vf_loss: 1.825761542055342
    num_agent_steps_sampled: 1182000
    num_agent_steps_trained: 1182000
    num_steps_sampled: 1182000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1182,33517.1,1182000,-21.463,-18.8,-21.8,214.63


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1183000
  custom_metrics: {}
  date: 2021-10-22_05-04-56
  done: false
  episode_len_mean: 214.61
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -21.461000000000027
  episode_reward_min: -21.80000000000004
  episodes_this_iter: 5
  episodes_total: 4073
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9698220426801596e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.1639536009894477
          entropy_coeff: 0.009999999999999998
          kl: 0.0007489858054605965
          policy_loss: -0.0159405245549149
          total_loss: 1.7148494031694201
          vf_explained_var: 0.045687198638916016
          vf_loss: 1.7324294341935051
    num_agent_steps_sampled: 1183000
    num_agent_steps_trained: 1183000
    num_steps_sampled: 1183000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1183,33549.2,1183000,-21.461,-18.8,-21.8,214.61


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1184000
  custom_metrics: {}
  date: 2021-10-22_05-05-29
  done: false
  episode_len_mean: 214.73
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -21.473000000000027
  episode_reward_min: -22.900000000000055
  episodes_this_iter: 4
  episodes_total: 4077
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.849110213400798e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.2944226756691933
          entropy_coeff: 0.009999999999999998
          kl: 0.04523967631470733
          policy_loss: 0.06160485156708294
          total_loss: 0.7829108423656888
          vf_explained_var: 0.3987543284893036
          vf_loss: 0.724250215291977
    num_agent_steps_sampled: 1184000
    num_agent_steps_trained: 1184000
    num_steps_sampled: 1184000
    num_steps_trained: 1184000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1184,33582.2,1184000,-21.473,-18.8,-22.9,214.73




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1185000
  custom_metrics: {}
  date: 2021-10-22_05-06-18
  done: false
  episode_len_mean: 214.65
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -21.465000000000032
  episode_reward_min: -22.900000000000055
  episodes_this_iter: 5
  episodes_total: 4082
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4773665320101198e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.3705714401271608
          entropy_coeff: 0.009999999999999998
          kl: 0.019136698257119947
          policy_loss: -0.019102835241291256
          total_loss: 0.7245884676774342
          vf_explained_var: 0.4134872853755951
          vf_loss: 0.7473970154921213
    num_agent_steps_sampled: 1185000
    num_agent_steps_trained: 1185000
    num_steps_sampled: 1185000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1185,33631.9,1185000,-21.465,-18.8,-22.9,214.65


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1186000
  custom_metrics: {}
  date: 2021-10-22_05-06-50
  done: false
  episode_len_mean: 215.08
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -21.508000000000038
  episode_reward_min: -23.20000000000006
  episodes_this_iter: 4
  episodes_total: 4086
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4773665320101198e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.620961762799157
          entropy_coeff: 0.009999999999999998
          kl: 0.02707266815971394
          policy_loss: -0.036227373778820036
          total_loss: 0.6109460304180782
          vf_explained_var: 0.6311541199684143
          vf_loss: 0.6533830278449588
    num_agent_steps_sampled: 1186000
    num_agent_steps_trained: 1186000
    num_steps_sampled: 1186000
    num_steps_trained: 11860

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1186,33663.2,1186000,-21.508,-18.8,-23.2,215.08


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1187000
  custom_metrics: {}
  date: 2021-10-22_05-07-20
  done: false
  episode_len_mean: 216.36
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -21.63600000000003
  episode_reward_min: -24.60000000000008
  episodes_this_iter: 5
  episodes_total: 4091
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.21604979801518e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.6595100038581424
          entropy_coeff: 0.009999999999999998
          kl: 0.032988799164233897
          policy_loss: 0.03276197049352858
          total_loss: 0.33898047655820845
          vf_explained_var: 0.8758698105812073
          vf_loss: 0.31281360420915816
    num_agent_steps_sampled: 1187000
    num_agent_steps_trained: 1187000
    num_steps_sampled: 1187000
    num_steps_trained: 118700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1187,33693.7,1187000,-21.636,-18.8,-24.6,216.36


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1188000
  custom_metrics: {}
  date: 2021-10-22_05-07-49
  done: false
  episode_len_mean: 217.39
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -21.739000000000043
  episode_reward_min: -24.60000000000008
  episodes_this_iter: 4
  episodes_total: 4095
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3240746970227693e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.727116807964113
          entropy_coeff: 0.009999999999999998
          kl: 0.01766275957834051
          policy_loss: -0.04411550097995334
          total_loss: 0.28799327959616977
          vf_explained_var: 0.7985098361968994
          vf_loss: 0.33937994539737704
    num_agent_steps_sampled: 1188000
    num_agent_steps_trained: 1188000
    num_steps_sampled: 1188000
    num_steps_trained: 1188

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1188,33722.8,1188000,-21.739,-18.8,-24.6,217.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1189000
  custom_metrics: {}
  date: 2021-10-22_05-08-17
  done: false
  episode_len_mean: 218.96
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -21.896000000000036
  episode_reward_min: -27.100000000000115
  episodes_this_iter: 4
  episodes_total: 4099
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3240746970227693e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.0250108407603369
          entropy_coeff: 0.009999999999999998
          kl: 0.043099540828676355
          policy_loss: -0.050380747103028825
          total_loss: 0.29099344627724755
          vf_explained_var: 0.7209390997886658
          vf_loss: 0.3516242980957031
    num_agent_steps_sampled: 1189000
    num_agent_steps_trained: 1189000
    num_steps_sampled: 1189000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1189,33750.6,1189000,-21.896,-18.8,-27.1,218.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1190000
  custom_metrics: {}
  date: 2021-10-22_05-08-43
  done: false
  episode_len_mean: 221.13
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -22.11300000000004
  episode_reward_min: -30.000000000000156
  episodes_this_iter: 3
  episodes_total: 4102
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.986112045534155e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.4042741696039835
          entropy_coeff: 0.009999999999999998
          kl: 0.028904345397818235
          policy_loss: 0.06885236551364263
          total_loss: 0.3903967807690302
          vf_explained_var: 0.6913259625434875
          vf_loss: 0.3355871544529994
    num_agent_steps_sampled: 1190000
    num_agent_steps_trained: 1190000
    num_steps_sampled: 1190000
    num_steps_trained: 119000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1190,33776.5,1190000,-22.113,-18.8,-30,221.13


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1191000
  custom_metrics: {}
  date: 2021-10-22_05-09-05
  done: false
  episode_len_mean: 224.42
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -22.442000000000043
  episode_reward_min: -33.60000000000021
  episodes_this_iter: 3
  episodes_total: 4105
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.47916806830123e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.675217542383406
          entropy_coeff: 0.009999999999999998
          kl: 0.021174589625281234
          policy_loss: 0.039034392767482336
          total_loss: 0.49208494987752704
          vf_explained_var: 0.5129092931747437
          vf_loss: 0.46980273582869103
    num_agent_steps_sampled: 1191000
    num_agent_steps_trained: 1191000
    num_steps_sampled: 1191000
    num_steps_trained: 11910

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1191,33798.9,1191000,-22.442,-18.8,-33.6,224.42


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1192000
  custom_metrics: {}
  date: 2021-10-22_05-09-27
  done: false
  episode_len_mean: 228.3
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -22.830000000000055
  episode_reward_min: -35.70000000000024
  episodes_this_iter: 3
  episodes_total: 4108
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1218752102451844e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.704290098614163
          entropy_coeff: 0.009999999999999998
          kl: 0.033237601698977985
          policy_loss: 0.0667080389128791
          total_loss: 0.6090006977319717
          vf_explained_var: 0.22672341763973236
          vf_loss: 0.5593355589442783
    num_agent_steps_sampled: 1192000
    num_agent_steps_trained: 1192000
    num_steps_sampled: 1192000
    num_steps_trained: 1192000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1192,33820.6,1192000,-22.83,-18.8,-35.7,228.3




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1193000
  custom_metrics: {}
  date: 2021-10-22_05-10-09
  done: false
  episode_len_mean: 231.15
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -23.115000000000055
  episode_reward_min: -35.70000000000024
  episodes_this_iter: 3
  episodes_total: 4111
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.682812815367777e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.5275948418511285
          entropy_coeff: 0.009999999999999998
          kl: 0.02967843419818763
          policy_loss: -0.018608719441625808
          total_loss: 0.6849633975161447
          vf_explained_var: 0.1961691975593567
          vf_loss: 0.7188480648729536
    num_agent_steps_sampled: 1193000
    num_agent_steps_trained: 1193000
    num_steps_sampled: 1193000
    num_steps_trained: 11930

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1193,33862.3,1193000,-23.115,-18.8,-35.7,231.15


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1194000
  custom_metrics: {}
  date: 2021-10-22_05-10-30
  done: false
  episode_len_mean: 235.12
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -23.51200000000006
  episode_reward_min: -35.70000000000024
  episodes_this_iter: 3
  episodes_total: 4114
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.524219223051666e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.6019928985171847
          entropy_coeff: 0.009999999999999998
          kl: 0.016300710312556057
          policy_loss: 0.032339762151241305
          total_loss: 0.7654829525285297
          vf_explained_var: 0.2950892448425293
          vf_loss: 0.749163117342525
    num_agent_steps_sampled: 1194000
    num_agent_steps_trained: 1194000
    num_steps_sampled: 1194000
    num_steps_trained: 1194000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1194,33883.9,1194000,-23.512,-18.8,-35.7,235.12


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1195000
  custom_metrics: {}
  date: 2021-10-22_05-10-51
  done: false
  episode_len_mean: 239.84
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -23.984000000000066
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 3
  episodes_total: 4117
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.524219223051666e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.7141302863756815
          entropy_coeff: 0.009999999999999998
          kl: 0.013078906559616459
          policy_loss: 0.05956730834311909
          total_loss: 0.8533776872687869
          vf_explained_var: 0.1396150141954422
          vf_loss: 0.8109516958395641
    num_agent_steps_sampled: 1195000
    num_agent_steps_trained: 1195000
    num_steps_sampled: 1195000
    num_steps_trained: 11950

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1195,33904.2,1195000,-23.984,-18.8,-37.6,239.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1196000
  custom_metrics: {}
  date: 2021-10-22_05-11-11
  done: false
  episode_len_mean: 242.8
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.280000000000072
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 2
  episodes_total: 4119
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.524219223051666e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.5487227903472052
          entropy_coeff: 0.009999999999999998
          kl: 0.016975988446673067
          policy_loss: -0.08488231549660365
          total_loss: 0.7863632417387433
          vf_explained_var: -0.10399655997753143
          vf_loss: 0.8867327916953299
    num_agent_steps_sampled: 1196000
    num_agent_steps_trained: 1196000
    num_steps_sampled: 1196000
    num_steps_trained: 119

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1196,33924.7,1196000,-24.28,-18.8,-37.6,242.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1197000
  custom_metrics: {}
  date: 2021-10-22_05-11-33
  done: false
  episode_len_mean: 247.49
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -24.749000000000077
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 3
  episodes_total: 4122
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.524219223051666e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.4227359626028273
          entropy_coeff: 0.009999999999999998
          kl: 0.03608814957457756
          policy_loss: -0.1080519997411304
          total_loss: 1.0414597498046028
          vf_explained_var: 0.3127322196960449
          vf_loss: 1.163739091820187
    num_agent_steps_sampled: 1197000
    num_agent_steps_trained: 1197000
    num_steps_sampled: 1197000
    num_steps_trained: 1197000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1197,33946,1197000,-24.749,-18.8,-38.7,247.49


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1198000
  custom_metrics: {}
  date: 2021-10-22_05-11-55
  done: false
  episode_len_mean: 251.39
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -25.139000000000088
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 3
  episodes_total: 4125
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.786328834577498e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.5608767906824748
          entropy_coeff: 0.009999999999999998
          kl: 0.012388972211494024
          policy_loss: 0.011336104737387763
          total_loss: 0.7476670900980632
          vf_explained_var: 0.5245355367660522
          vf_loss: 0.7519397446264823
    num_agent_steps_sampled: 1198000
    num_agent_steps_trained: 1198000
    num_steps_sampled: 1198000
    num_steps_trained: 11980

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1198,33968.2,1198000,-25.139,-18.8,-38.7,251.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1199000
  custom_metrics: {}
  date: 2021-10-22_05-12-16
  done: false
  episode_len_mean: 255.38
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -25.538000000000093
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 3
  episodes_total: 4128
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.786328834577498e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.5284529394573636
          entropy_coeff: 0.009999999999999998
          kl: 0.03464823153791422
          policy_loss: 0.07776999904049767
          total_loss: 0.9905372540156047
          vf_explained_var: 0.3771158456802368
          vf_loss: 0.9280517887738016
    num_agent_steps_sampled: 1199000
    num_agent_steps_trained: 1199000
    num_steps_sampled: 1199000
    num_steps_trained: 1199000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1199,33989.3,1199000,-25.538,-18.8,-38.7,255.38


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1200000
  custom_metrics: {}
  date: 2021-10-22_05-12-38
  done: false
  episode_len_mean: 259.16
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -25.916000000000096
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 3
  episodes_total: 4131
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.679493251866246e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.5305513183275858
          entropy_coeff: 0.009999999999999998
          kl: 0.016525178584604797
          policy_loss: 0.083378104865551
          total_loss: 0.7056904839144813
          vf_explained_var: 0.3767611086368561
          vf_loss: 0.6376178934756253
    num_agent_steps_sampled: 1200000
    num_agent_steps_trained: 1200000
    num_steps_sampled: 1200000
    num_steps_trained: 1200000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1200,34011.2,1200000,-25.916,-18.8,-38.7,259.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1201000
  custom_metrics: {}
  date: 2021-10-22_05-12-59
  done: false
  episode_len_mean: 263.01
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -26.301000000000098
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 3
  episodes_total: 4134
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.679493251866246e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.4934845195876227
          entropy_coeff: 0.009999999999999998
          kl: 0.01642734045255586
          policy_loss: 0.07384805844889747
          total_loss: 0.8504168344868555
          vf_explained_var: 0.3377936780452728
          vf_loss: 0.7915036402642727
    num_agent_steps_sampled: 1201000
    num_agent_steps_trained: 1201000
    num_steps_sampled: 1201000
    num_steps_trained: 1201000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1201,34032.7,1201000,-26.301,-18.8,-38.7,263.01


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1202000
  custom_metrics: {}
  date: 2021-10-22_05-13-22
  done: false
  episode_len_mean: 266.26
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -26.626000000000108
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 3
  episodes_total: 4137
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.679493251866246e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.3726534022225274
          entropy_coeff: 0.009999999999999998
          kl: 0.01638698154606486
          policy_loss: 0.05479074385431078
          total_loss: 1.019670293894079
          vf_explained_var: 0.3450099527835846
          vf_loss: 0.9786060938404666
    num_agent_steps_sampled: 1202000
    num_agent_steps_trained: 1202000
    num_steps_sampled: 1202000
    num_steps_trained: 1202000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1202,34055.6,1202000,-26.626,-18.8,-38.7,266.26




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1203000
  custom_metrics: {}
  date: 2021-10-22_05-14-04
  done: false
  episode_len_mean: 268.53
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -26.85300000000011
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 3
  episodes_total: 4140
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.679493251866246e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.2409412966834175
          entropy_coeff: 0.009999999999999998
          kl: 0.08128615646025503
          policy_loss: -0.09698984622955323
          total_loss: 0.9576452361212836
          vf_explained_var: 0.45811882615089417
          vf_loss: 1.067044488257832
    num_agent_steps_sampled: 1203000
    num_agent_steps_trained: 1203000
    num_steps_sampled: 1203000
    num_steps_trained: 1203000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1203,34097.4,1203000,-26.853,-18.8,-38.7,268.53


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1204000
  custom_metrics: {}
  date: 2021-10-22_05-14-30
  done: false
  episode_len_mean: 271.37
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -27.13700000000011
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 4
  episodes_total: 4144
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.519239877799371e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.0747320241398282
          entropy_coeff: 0.009999999999999998
          kl: 0.014584285373181653
          policy_loss: 0.04788200077083376
          total_loss: 0.9714773542351193
          vf_explained_var: 0.5726003646850586
          vf_loss: 0.9343426720963584
    num_agent_steps_sampled: 1204000
    num_agent_steps_trained: 1204000
    num_steps_sampled: 1204000
    num_steps_trained: 1204000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1204,34122.9,1204000,-27.137,-18.8,-38.7,271.37


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1205000
  custom_metrics: {}
  date: 2021-10-22_05-14-57
  done: false
  episode_len_mean: 273.54
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -27.354000000000124
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 3
  episodes_total: 4147
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.519239877799371e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.1267880174848768
          entropy_coeff: 0.009999999999999998
          kl: 0.03573681956159792
          policy_loss: -0.09185712006356982
          total_loss: 0.7893599874443478
          vf_explained_var: 0.6131213307380676
          vf_loss: 0.8924849914179908
    num_agent_steps_sampled: 1205000
    num_agent_steps_trained: 1205000
    num_steps_sampled: 1205000
    num_steps_trained: 120500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1205,34150.4,1205000,-27.354,-18.8,-38.7,273.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1206000
  custom_metrics: {}
  date: 2021-10-22_05-15-24
  done: false
  episode_len_mean: 276.12
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -27.612000000000123
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 4
  episodes_total: 4151
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2778859816699056e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.0536439617474873
          entropy_coeff: 0.009999999999999998
          kl: 0.009866403393507243
          policy_loss: 0.05790182997783025
          total_loss: 0.8480530248747932
          vf_explained_var: 0.5661244988441467
          vf_loss: 0.8006876495149401
    num_agent_steps_sampled: 1206000
    num_agent_steps_trained: 1206000
    num_steps_sampled: 1206000
    num_steps_trained: 12060

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1206,34177.4,1206000,-27.612,-19.5,-38.7,276.12


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1207000
  custom_metrics: {}
  date: 2021-10-22_05-15-51
  done: false
  episode_len_mean: 278.6
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -27.860000000000127
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 4
  episodes_total: 4155
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2778859816699056e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.9786318937937418
          entropy_coeff: 0.009999999999999998
          kl: 0.016128781556017203
          policy_loss: 0.038672070287995866
          total_loss: 0.7402629435062409
          vf_explained_var: 0.5760390162467957
          vf_loss: 0.7113771935304006
    num_agent_steps_sampled: 1207000
    num_agent_steps_trained: 1207000
    num_steps_sampled: 1207000
    num_steps_trained: 12070

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1207,34203.8,1207000,-27.86,-19.5,-38.7,278.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1208000
  custom_metrics: {}
  date: 2021-10-22_05-16-16
  done: false
  episode_len_mean: 280.79
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -28.079000000000132
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 3
  episodes_total: 4158
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2778859816699056e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.9747886889510684
          entropy_coeff: 0.009999999999999998
          kl: 0.013310569543746098
          policy_loss: 0.041491272341873914
          total_loss: 0.785023319721222
          vf_explained_var: 0.5306181311607361
          vf_loss: 0.7532799362308449
    num_agent_steps_sampled: 1208000
    num_agent_steps_trained: 1208000
    num_steps_sampled: 1208000
    num_steps_trained: 12080

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1208,34229.6,1208000,-28.079,-19.5,-38.7,280.79


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1209000
  custom_metrics: {}
  date: 2021-10-22_05-16-44
  done: false
  episode_len_mean: 282.74
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -28.274000000000136
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 4
  episodes_total: 4162
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2778859816699056e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.9175198859638638
          entropy_coeff: 0.009999999999999998
          kl: 0.05450684582433346
          policy_loss: 0.011949016898870467
          total_loss: 0.8171399195988973
          vf_explained_var: 0.45450901985168457
          vf_loss: 0.8143661055299971
    num_agent_steps_sampled: 1209000
    num_agent_steps_trained: 1209000
    num_steps_sampled: 1209000
    num_steps_trained: 1209

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1209,34256.8,1209000,-28.274,-19.5,-38.7,282.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1210000
  custom_metrics: {}
  date: 2021-10-22_05-17-09
  done: false
  episode_len_mean: 285.16
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -28.51600000000014
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 3
  episodes_total: 4165
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9168289725048587e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.8818826350900862
          entropy_coeff: 0.009999999999999998
          kl: 0.035000031372273255
          policy_loss: -0.04031618154711193
          total_loss: 1.0240237222777473
          vf_explained_var: 0.2728204131126404
          vf_loss: 1.0731587436464098
    num_agent_steps_sampled: 1210000
    num_agent_steps_trained: 1210000
    num_steps_sampled: 1210000
    num_steps_trained: 12100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1210,34282.4,1210000,-28.516,-19.5,-38.7,285.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1211000
  custom_metrics: {}
  date: 2021-10-22_05-17-33
  done: false
  episode_len_mean: 288.09
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -28.809000000000143
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 4
  episodes_total: 4169
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8752434587572873e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.9720127284526825
          entropy_coeff: 0.009999999999999998
          kl: 0.01812464607574478
          policy_loss: 0.05908427793118689
          total_loss: 1.158171824614207
          vf_explained_var: 0.177288219332695
          vf_loss: 1.1088076856401232
    num_agent_steps_sampled: 1211000
    num_agent_steps_trained: 1211000
    num_steps_sampled: 1211000
    num_steps_trained: 1211000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1211,34306.5,1211000,-28.809,-19.5,-38.7,288.09




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1212000
  custom_metrics: {}
  date: 2021-10-22_05-18-15
  done: false
  episode_len_mean: 290.47
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -29.047000000000143
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 3
  episodes_total: 4172
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8752434587572873e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.0432552887333764
          entropy_coeff: 0.009999999999999998
          kl: 0.01978555555797183
          policy_loss: 0.024713922209209867
          total_loss: 0.8761642585198085
          vf_explained_var: -0.0693124532699585
          vf_loss: 0.8618828911748198
    num_agent_steps_sampled: 1212000
    num_agent_steps_trained: 1212000
    num_steps_sampled: 1212000
    num_steps_trained: 1212

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1212,34348,1212000,-29.047,-19.5,-38.7,290.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1213000
  custom_metrics: {}
  date: 2021-10-22_05-18-41
  done: false
  episode_len_mean: 293.14
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -29.314000000000146
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 4
  episodes_total: 4176
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8752434587572873e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.915412575006485
          entropy_coeff: 0.009999999999999998
          kl: 0.015050149377185247
          policy_loss: 0.02071975221236547
          total_loss: 1.2110284911261664
          vf_explained_var: 0.15852287411689758
          vf_loss: 1.1994628601604038
    num_agent_steps_sampled: 1213000
    num_agent_steps_trained: 1213000
    num_steps_sampled: 1213000
    num_steps_trained: 12130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1213,34373.7,1213000,-29.314,-19.5,-38.7,293.14


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1214000
  custom_metrics: {}
  date: 2021-10-22_05-19-08
  done: false
  episode_len_mean: 294.83
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -29.483000000000157
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 3
  episodes_total: 4179
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8752434587572873e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.8794498470094468
          entropy_coeff: 0.009999999999999998
          kl: 0.007433197298726076
          policy_loss: -0.07730696830484603
          total_loss: 0.8216488424274656
          vf_explained_var: 0.1657634675502777
          vf_loss: 0.9077503116594421
    num_agent_steps_sampled: 1214000
    num_agent_steps_trained: 1214000
    num_steps_sampled: 1214000
    num_steps_trained: 1214

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1214,34401.5,1214000,-29.483,-19.5,-38.7,294.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1215000
  custom_metrics: {}
  date: 2021-10-22_05-19-34
  done: false
  episode_len_mean: 297.35
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -29.73500000000015
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 4
  episodes_total: 4183
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8752434587572873e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.8741531789302825
          entropy_coeff: 0.009999999999999998
          kl: 0.012455599321306939
          policy_loss: 0.019314597960975434
          total_loss: 1.364015453391605
          vf_explained_var: 0.07744113355875015
          vf_loss: 1.3534423828125
    num_agent_steps_sampled: 1215000
    num_agent_steps_trained: 1215000
    num_steps_sampled: 1215000
    num_steps_trained: 1215000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1215,34427.3,1215000,-29.735,-22,-38.7,297.35


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1216000
  custom_metrics: {}
  date: 2021-10-22_05-20-02
  done: false
  episode_len_mean: 298.8
  episode_media: {}
  episode_reward_max: -23.10000000000006
  episode_reward_mean: -29.88000000000016
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 4
  episodes_total: 4187
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8752434587572873e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.8817094855838352
          entropy_coeff: 0.009999999999999998
          kl: 0.013290639495504821
          policy_loss: 0.04018305747045411
          total_loss: 1.1220279263125525
          vf_explained_var: 0.08912013471126556
          vf_loss: 1.0906619727611542
    num_agent_steps_sampled: 1216000
    num_agent_steps_trained: 1216000
    num_steps_sampled: 1216000
    num_steps_trained: 1216000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1216,34455.1,1216000,-29.88,-23.1,-38.7,298.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1217000
  custom_metrics: {}
  date: 2021-10-22_05-20-30
  done: false
  episode_len_mean: 300.02
  episode_media: {}
  episode_reward_max: -23.800000000000068
  episode_reward_mean: -30.00200000000016
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 4
  episodes_total: 4191
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8752434587572873e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.8821311089727614
          entropy_coeff: 0.009999999999999998
          kl: 0.004051086282306926
          policy_loss: -0.0005939213765992059
          total_loss: 1.3550990912649366
          vf_explained_var: 0.13668625056743622
          vf_loss: 1.3645143138037787
    num_agent_steps_sampled: 1217000
    num_agent_steps_trained: 1217000
    num_steps_sampled: 1217000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1217,34482.6,1217000,-30.002,-23.8,-38.7,300.02


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1218000
  custom_metrics: {}
  date: 2021-10-22_05-20-56
  done: false
  episode_len_mean: 300.83
  episode_media: {}
  episode_reward_max: -24.200000000000074
  episode_reward_mean: -30.083000000000155
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 3
  episodes_total: 4194
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4376217293786437e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.949601415793101
          entropy_coeff: 0.009999999999999998
          kl: 0.03512261188822079
          policy_loss: 0.024036824040942723
          total_loss: 0.9118733856413099
          vf_explained_var: 0.19597837328910828
          vf_loss: 0.8973325696256426
    num_agent_steps_sampled: 1218000
    num_agent_steps_trained: 1218000
    num_steps_sampled: 1218000
    num_steps_trained: 12180

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1218,34509.4,1218000,-30.083,-24.2,-38.7,300.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1219000
  custom_metrics: {}
  date: 2021-10-22_05-21-23
  done: false
  episode_len_mean: 301.88
  episode_media: {}
  episode_reward_max: -25.700000000000095
  episode_reward_mean: -30.188000000000162
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 4
  episodes_total: 4198
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1564325940679656e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.016341135236952
          entropy_coeff: 0.009999999999999998
          kl: 0.02775356638770137
          policy_loss: 0.0246474199824863
          total_loss: 1.143024300204383
          vf_explained_var: 0.34033912420272827
          vf_loss: 1.1285402966870202
    num_agent_steps_sampled: 1219000
    num_agent_steps_trained: 1219000
    num_steps_sampled: 1219000
    num_steps_trained: 1219000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1219,34535.6,1219000,-30.188,-25.7,-38.7,301.88




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1220000
  custom_metrics: {}
  date: 2021-10-22_05-22-08
  done: false
  episode_len_mean: 301.07
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -30.107000000000152
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 4
  episodes_total: 4202
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2346488911019477e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.1136895755926768
          entropy_coeff: 0.009999999999999998
          kl: 0.011846996655545203
          policy_loss: 0.030384135163492625
          total_loss: 1.1619752009709676
          vf_explained_var: 0.271714985370636
          vf_loss: 1.1427279707458284
    num_agent_steps_sampled: 1220000
    num_agent_steps_trained: 1220000
    num_steps_sampled: 1220000
    num_steps_trained: 12200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1220,34580.4,1220000,-30.107,-23.5,-38.7,301.07


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1221000
  custom_metrics: {}
  date: 2021-10-22_05-22-34
  done: false
  episode_len_mean: 299.74
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -29.97400000000015
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 3
  episodes_total: 4205
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2346488911019477e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.9992982765038808
          entropy_coeff: 0.009999999999999998
          kl: 0.016641433916039006
          policy_loss: 0.02735654620660676
          total_loss: 1.0848399837811789
          vf_explained_var: 0.24109578132629395
          vf_loss: 1.0674764315287273
    num_agent_steps_sampled: 1221000
    num_agent_steps_trained: 1221000
    num_steps_sampled: 1221000
    num_steps_trained: 12210

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1221,34606.8,1221000,-29.974,-23.5,-38.7,299.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1222000
  custom_metrics: {}
  date: 2021-10-22_05-23-01
  done: false
  episode_len_mean: 296.91
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -29.691000000000145
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 4
  episodes_total: 4209
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2346488911019477e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.8470238553153144
          entropy_coeff: 0.009999999999999998
          kl: 0.03244782337394491
          policy_loss: -0.007023298657602734
          total_loss: 1.0847960498597886
          vf_explained_var: 0.47811269760131836
          vf_loss: 1.1002895911534627
    num_agent_steps_sampled: 1222000
    num_agent_steps_trained: 1222000
    num_steps_sampled: 1222000
    num_steps_trained: 122

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1222,34633.8,1222000,-29.691,-23.5,-38.7,296.91


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1223000
  custom_metrics: {}
  date: 2021-10-22_05-23-29
  done: false
  episode_len_mean: 294.1
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -29.41000000000015
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 4
  episodes_total: 4213
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.851973336652922e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.6750581536028121
          entropy_coeff: 0.009999999999999998
          kl: 0.004432692358349281
          policy_loss: -0.003411004195610682
          total_loss: 1.2137611760033502
          vf_explained_var: 0.3358288109302521
          vf_loss: 1.223922758632236
    num_agent_steps_sampled: 1223000
    num_agent_steps_trained: 1223000
    num_steps_sampled: 1223000
    num_steps_trained: 1223000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1223,34662,1223000,-29.41,-23.5,-38.7,294.1


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1224000
  custom_metrics: {}
  date: 2021-10-22_05-23-57
  done: false
  episode_len_mean: 289.8
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -28.980000000000146
  episode_reward_min: -38.70000000000028
  episodes_this_iter: 4
  episodes_total: 4217
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.425986668326461e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.713913842704561
          entropy_coeff: 0.009999999999999998
          kl: 0.07450566667695442
          policy_loss: 0.005273109177748362
          total_loss: 0.9715132031175825
          vf_explained_var: 0.43409591913223267
          vf_loss: 0.9733792311615415
    num_agent_steps_sampled: 1224000
    num_agent_steps_trained: 1224000
    num_steps_sampled: 1224000
    num_steps_trained: 1224000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1224,34690.1,1224000,-28.98,-23.5,-38.7,289.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1225000
  custom_metrics: {}
  date: 2021-10-22_05-24-26
  done: false
  episode_len_mean: 285.44
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -28.544000000000143
  episode_reward_min: -36.900000000000254
  episodes_this_iter: 4
  episodes_total: 4221
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.638980002489691e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.6532540943887498
          entropy_coeff: 0.009999999999999998
          kl: 0.030747898239762407
          policy_loss: 0.0457180251677831
          total_loss: 0.8959749718507131
          vf_explained_var: 0.5359729528427124
          vf_loss: 0.856789501508077
    num_agent_steps_sampled: 1225000
    num_agent_steps_trained: 1225000
    num_steps_sampled: 1225000
    num_steps_trained: 1225000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1225,34718.8,1225000,-28.544,-23.5,-36.9,285.44


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1226000
  custom_metrics: {}
  date: 2021-10-22_05-24-53
  done: false
  episode_len_mean: 282.84
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -28.284000000000137
  episode_reward_min: -36.900000000000254
  episodes_this_iter: 3
  episodes_total: 4224
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.458470003734536e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.7195168144173092
          entropy_coeff: 0.009999999999999998
          kl: 0.024728840697693814
          policy_loss: -0.08441662556595272
          total_loss: 0.8187583168347676
          vf_explained_var: 0.38362765312194824
          vf_loss: 0.9103701240486569
    num_agent_steps_sampled: 1226000
    num_agent_steps_trained: 1226000
    num_steps_sampled: 1226000
    num_steps_trained: 122

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1226,34745.9,1226000,-28.284,-23.5,-36.9,282.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1227000
  custom_metrics: {}
  date: 2021-10-22_05-25-20
  done: false
  episode_len_mean: 280.07
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -28.007000000000126
  episode_reward_min: -36.900000000000254
  episodes_this_iter: 4
  episodes_total: 4228
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.187705005601807e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.6231050021118588
          entropy_coeff: 0.009999999999999998
          kl: 0.004541252524383508
          policy_loss: 0.04242295589711931
          total_loss: 1.0855902175108592
          vf_explained_var: 0.27975893020629883
          vf_loss: 1.0493982970714568
    num_agent_steps_sampled: 1227000
    num_agent_steps_trained: 1227000
    num_steps_sampled: 1227000
    num_steps_trained: 1227

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1227,34772.8,1227000,-28.007,-23.5,-36.9,280.07




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1228000
  custom_metrics: {}
  date: 2021-10-22_05-26-05
  done: false
  episode_len_mean: 277.12
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -27.71200000000012
  episode_reward_min: -36.00000000000024
  episodes_this_iter: 4
  episodes_total: 4232
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.0938525028009034e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.8010006142987145
          entropy_coeff: 0.009999999999999998
          kl: 0.041854234798967445
          policy_loss: 0.03943712330526776
          total_loss: 1.1065463105837503
          vf_explained_var: 0.29387566447257996
          vf_loss: 1.0751192112763723
    num_agent_steps_sampled: 1228000
    num_agent_steps_trained: 1228000
    num_steps_sampled: 1228000
    num_steps_trained: 12280

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1228,34817.6,1228000,-27.712,-22.2,-36,277.12


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1229000
  custom_metrics: {}
  date: 2021-10-22_05-26-33
  done: false
  episode_len_mean: 274.04
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -27.40400000000012
  episode_reward_min: -34.60000000000022
  episodes_this_iter: 4
  episodes_total: 4236
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.140778754201356e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.5469167871607674
          entropy_coeff: 0.009999999999999998
          kl: 0.004863776673118991
          policy_loss: 0.0292394091685613
          total_loss: 1.0071386218070983
          vf_explained_var: 0.2748866081237793
          vf_loss: 0.9833683709303538
    num_agent_steps_sampled: 1229000
    num_agent_steps_trained: 1229000
    num_steps_sampled: 1229000
    num_steps_trained: 1229000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1229,34845.6,1229000,-27.404,-22.2,-34.6,274.04


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1230000
  custom_metrics: {}
  date: 2021-10-22_05-27-02
  done: false
  episode_len_mean: 272.16
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -27.216000000000122
  episode_reward_min: -34.60000000000022
  episodes_this_iter: 4
  episodes_total: 4240
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.070389377100678e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.4766385591692395
          entropy_coeff: 0.009999999999999998
          kl: 0.009366861142779234
          policy_loss: 0.02233062113324801
          total_loss: 0.9751138395733303
          vf_explained_var: 0.3520832359790802
          vf_loss: 0.9575495971573724
    num_agent_steps_sampled: 1230000
    num_agent_steps_trained: 1230000
    num_steps_sampled: 1230000
    num_steps_trained: 123000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1230,34875,1230000,-27.216,-22.2,-34.6,272.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1231000
  custom_metrics: {}
  date: 2021-10-22_05-27-31
  done: false
  episode_len_mean: 270.59
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -27.059000000000115
  episode_reward_min: -34.60000000000022
  episodes_this_iter: 4
  episodes_total: 4244
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.070389377100678e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.5896240525775486
          entropy_coeff: 0.009999999999999998
          kl: 0.12719024508726837
          policy_loss: -0.008642237157457405
          total_loss: 0.7261128524939219
          vf_explained_var: 0.5531204342842102
          vf_loss: 0.7406513333320618
    num_agent_steps_sampled: 1231000
    num_agent_steps_trained: 1231000
    num_steps_sampled: 1231000
    num_steps_trained: 12310

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1231,34903.9,1231000,-27.059,-22.2,-34.6,270.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1232000
  custom_metrics: {}
  date: 2021-10-22_05-28-01
  done: false
  episode_len_mean: 269.37
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.937000000000115
  episode_reward_min: -34.60000000000022
  episodes_this_iter: 4
  episodes_total: 4248
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.605584065651016e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.44089194503095414
          entropy_coeff: 0.009999999999999998
          kl: 0.010836118762638533
          policy_loss: 0.03395004409054915
          total_loss: 0.7805389198991988
          vf_explained_var: 0.5143637657165527
          vf_loss: 0.7509977956612904
    num_agent_steps_sampled: 1232000
    num_agent_steps_trained: 1232000
    num_steps_sampled: 1232000
    num_steps_trained: 12320

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1232,34933.2,1232000,-26.937,-22.2,-34.6,269.37


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1233000
  custom_metrics: {}
  date: 2021-10-22_05-28-28
  done: false
  episode_len_mean: 269.36
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.936000000000107
  episode_reward_min: -34.60000000000022
  episodes_this_iter: 3
  episodes_total: 4251
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.605584065651016e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.5408097902933756
          entropy_coeff: 0.009999999999999998
          kl: 0.016260640620813404
          policy_loss: -0.007191894368992912
          total_loss: 0.8810144788689084
          vf_explained_var: 0.22227522730827332
          vf_loss: 0.8936144544018639
    num_agent_steps_sampled: 1233000
    num_agent_steps_trained: 1233000
    num_steps_sampled: 1233000
    num_steps_trained: 123

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1233,34960.9,1233000,-26.936,-22.2,-34.6,269.36


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1234000
  custom_metrics: {}
  date: 2021-10-22_05-28-55
  done: false
  episode_len_mean: 269.44
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.94400000000011
  episode_reward_min: -34.60000000000022
  episodes_this_iter: 4
  episodes_total: 4255
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.605584065651016e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.5016283945904838
          entropy_coeff: 0.009999999999999998
          kl: 0.020298827204460284
          policy_loss: 0.056149725036488636
          total_loss: 0.9844062639607324
          vf_explained_var: 0.2754755914211273
          vf_loss: 0.9332728260093265
    num_agent_steps_sampled: 1234000
    num_agent_steps_trained: 1234000
    num_steps_sampled: 1234000
    num_steps_trained: 123400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1234,34988,1234000,-26.944,-22.2,-34.6,269.44


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1235000
  custom_metrics: {}
  date: 2021-10-22_05-29-23
  done: false
  episode_len_mean: 268.31
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.831000000000117
  episode_reward_min: -34.60000000000022
  episodes_this_iter: 4
  episodes_total: 4259
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.908376098476526e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.5465670986307992
          entropy_coeff: 0.009999999999999998
          kl: 0.013834777934163848
          policy_loss: 0.01871594012611442
          total_loss: 0.950583400328954
          vf_explained_var: 0.31851619482040405
          vf_loss: 0.9373331387837728
    num_agent_steps_sampled: 1235000
    num_agent_steps_trained: 1235000
    num_steps_sampled: 1235000
    num_steps_trained: 123500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1235,35016,1235000,-26.831,-22.2,-34.6,268.31




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1236000
  custom_metrics: {}
  date: 2021-10-22_05-30-03
  done: false
  episode_len_mean: 270.39
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -27.03900000000012
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 3
  episodes_total: 4262
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.908376098476526e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.6703436877992418
          entropy_coeff: 0.009999999999999998
          kl: 0.10610493283441684
          policy_loss: 0.10501644867989753
          total_loss: 1.0157723764578501
          vf_explained_var: -0.16519087553024292
          vf_loss: 0.9174593707339631
    num_agent_steps_sampled: 1236000
    num_agent_steps_trained: 1236000
    num_steps_sampled: 1236000
    num_steps_trained: 123600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1236,35055.5,1236000,-27.039,-22.2,-41.5,270.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1237000
  custom_metrics: {}
  date: 2021-10-22_05-30-31
  done: false
  episode_len_mean: 269.49
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.94900000000011
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 4
  episodes_total: 4266
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0362564147714789e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.4745347996552785
          entropy_coeff: 0.009999999999999998
          kl: 0.00810881323291852
          policy_loss: 0.008883879913224115
          total_loss: 0.8457130107614729
          vf_explained_var: 0.5355650186538696
          vf_loss: 0.8415744807985094
    num_agent_steps_sampled: 1237000
    num_agent_steps_trained: 1237000
    num_steps_sampled: 1237000
    num_steps_trained: 123700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1237,35083.8,1237000,-26.949,-22.2,-41.5,269.49


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1238000
  custom_metrics: {}
  date: 2021-10-22_05-31-00
  done: false
  episode_len_mean: 267.88
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.78800000000011
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 4
  episodes_total: 4270
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0362564147714789e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.4054310424460305
          entropy_coeff: 0.009999999999999998
          kl: 0.006027425006539507
          policy_loss: -0.014241253584623336
          total_loss: 0.9408665696779887
          vf_explained_var: 0.3724932074546814
          vf_loss: 0.9591621180375417
    num_agent_steps_sampled: 1238000
    num_agent_steps_trained: 1238000
    num_steps_sampled: 1238000
    num_steps_trained: 1238

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1238,35112.7,1238000,-26.788,-22.2,-41.5,267.88


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1239000
  custom_metrics: {}
  date: 2021-10-22_05-31-29
  done: false
  episode_len_mean: 265.77
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.57700000000011
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 4
  episodes_total: 4274
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0362564147714789e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.3608223862118191
          entropy_coeff: 0.009999999999999998
          kl: 0.005929854998335
          policy_loss: 0.045273131794399686
          total_loss: 0.8214105023278131
          vf_explained_var: 0.5043591856956482
          vf_loss: 0.7797455886999766
    num_agent_steps_sampled: 1239000
    num_agent_steps_trained: 1239000
    num_steps_sampled: 1239000
    num_steps_trained: 1239000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1239,35141.5,1239000,-26.577,-22.2,-41.5,265.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1240000
  custom_metrics: {}
  date: 2021-10-22_05-31-58
  done: false
  episode_len_mean: 264.68
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.468000000000096
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 4
  episodes_total: 4278
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0362564147714789e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.38795246481895446
          entropy_coeff: 0.009999999999999998
          kl: 0.002903599010983113
          policy_loss: 0.020754305687215594
          total_loss: 1.013029123014874
          vf_explained_var: 0.3768019676208496
          vf_loss: 0.9961543434196048
    num_agent_steps_sampled: 1240000
    num_agent_steps_trained: 1240000
    num_steps_sampled: 1240000
    num_steps_trained: 1240

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1240,35170.5,1240000,-26.468,-22.2,-41.5,264.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1241000
  custom_metrics: {}
  date: 2021-10-22_05-32-27
  done: false
  episode_len_mean: 263.5
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.3500000000001
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 4
  episodes_total: 4282
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.181282073857394e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.5106918152835634
          entropy_coeff: 0.009999999999999998
          kl: 0.056628090501122746
          policy_loss: 0.001959993607468075
          total_loss: 0.9940483073393503
          vf_explained_var: 0.40259402990341187
          vf_loss: 0.9971952266163296
    num_agent_steps_sampled: 1241000
    num_agent_steps_trained: 1241000
    num_steps_sampled: 1241000
    num_steps_trained: 1241000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1241,35199.5,1241000,-26.35,-22.2,-41.5,263.5


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1242000
  custom_metrics: {}
  date: 2021-10-22_05-32-56
  done: false
  episode_len_mean: 262.92
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.2920000000001
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 4
  episodes_total: 4286
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.771923110786093e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.3707423243257735
          entropy_coeff: 0.009999999999999998
          kl: 0.012135502653370623
          policy_loss: 7.985002464718288e-05
          total_loss: 1.037972617149353
          vf_explained_var: 0.32170701026916504
          vf_loss: 1.0416001902686225
    num_agent_steps_sampled: 1242000
    num_agent_steps_trained: 1242000
    num_steps_sampled: 1242000
    num_steps_trained: 124200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1242,35228.3,1242000,-26.292,-22.2,-41.5,262.92




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1243000
  custom_metrics: {}
  date: 2021-10-22_05-33-41
  done: false
  episode_len_mean: 262.22
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.222000000000108
  episode_reward_min: -41.50000000000032
  episodes_this_iter: 4
  episodes_total: 4290
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.771923110786093e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.4750571545627382
          entropy_coeff: 0.009999999999999998
          kl: 0.07469229446751875
          policy_loss: 0.03616192614038785
          total_loss: 0.8911338448524475
          vf_explained_var: 0.3173132836818695
          vf_loss: 0.8597224977281358
    num_agent_steps_sampled: 1243000
    num_agent_steps_trained: 1243000
    num_steps_sampled: 1243000
    num_steps_trained: 1243000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1243,35273.2,1243000,-26.222,-22.2,-41.5,262.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1244000
  custom_metrics: {}
  date: 2021-10-22_05-33-53
  done: false
  episode_len_mean: 267.48
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -26.748000000000097
  episode_reward_min: -79.39999999999976
  episodes_this_iter: 1
  episodes_total: 4291
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1657884666179137e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.2648706828554471
          entropy_coeff: 0.009999999999999998
          kl: 0.022830249772073093
          policy_loss: -0.06176465683513217
          total_loss: 0.5352035789026155
          vf_explained_var: -0.42132288217544556
          vf_loss: 0.5996169431342019
    num_agent_steps_sampled: 1244000
    num_agent_steps_trained: 1244000
    num_steps_sampled: 1244000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1244,35285.8,1244000,-26.748,-22.2,-79.4,267.48


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1245000
  custom_metrics: {}
  date: 2021-10-22_05-34-04
  done: false
  episode_len_mean: 272.82
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -27.2820000000001
  episode_reward_min: -80.19999999999972
  episodes_this_iter: 1
  episodes_total: 4292
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7486826999268698e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.26306056843863596
          entropy_coeff: 0.009999999999999998
          kl: 0.0032017817720952912
          policy_loss: -0.05488302459319432
          total_loss: 0.5588006014625232
          vf_explained_var: -0.43962520360946655
          vf_loss: 0.6163142270925972
    num_agent_steps_sampled: 1245000
    num_agent_steps_trained: 1245000
    num_steps_sampled: 1245000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1245,35296.8,1245000,-27.282,-22.2,-80.2,272.82


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1246000
  custom_metrics: {}
  date: 2021-10-22_05-34-15
  done: false
  episode_len_mean: 278.01
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -27.80100000000009
  episode_reward_min: -80.19999999999972
  episodes_this_iter: 1
  episodes_total: 4293
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.743413499634349e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.2606802678770489
          entropy_coeff: 0.009999999999999998
          kl: 0.004083144609270938
          policy_loss: -0.04812799311346478
          total_loss: 0.6017375888095962
          vf_explained_var: -0.43203112483024597
          vf_loss: 0.6524723777340518
    num_agent_steps_sampled: 1246000
    num_agent_steps_trained: 1246000
    num_steps_sampled: 1246000
    num_steps_trained: 1246

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1246,35307.4,1246000,-27.801,-22.2,-80.2,278.01


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1247000
  custom_metrics: {}
  date: 2021-10-22_05-34-26
  done: false
  episode_len_mean: 289.13
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -28.913000000000082
  episode_reward_min: -88.09999999999927
  episodes_this_iter: 2
  episodes_total: 4295
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3717067498171745e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.3142393593986829
          entropy_coeff: 0.009999999999999998
          kl: 0.00613798516195365
          policy_loss: 0.12933675481213464
          total_loss: 0.6518725848860211
          vf_explained_var: -0.4342096447944641
          vf_loss: 0.5256782269829677
    num_agent_steps_sampled: 1247000
    num_agent_steps_trained: 1247000
    num_steps_sampled: 1247000
    num_steps_trained: 12470

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1247,35318.6,1247000,-28.913,-22.2,-88.1,289.13


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1248000
  custom_metrics: {}
  date: 2021-10-22_05-34-37
  done: false
  episode_len_mean: 294.34
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -29.434000000000083
  episode_reward_min: -88.09999999999927
  episodes_this_iter: 1
  episodes_total: 4296
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3717067498171745e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.3271012402243084
          entropy_coeff: 0.009999999999999998
          kl: 0.005318699990534192
          policy_loss: 0.04403426299492518
          total_loss: 0.3126141640875075
          vf_explained_var: -0.6602272987365723
          vf_loss: 0.27185091211770973
    num_agent_steps_sampled: 1248000
    num_agent_steps_trained: 1248000
    num_steps_sampled: 1248000
    num_steps_trained: 124

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1248,35329.4,1248000,-29.434,-22.2,-88.1,294.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1249000
  custom_metrics: {}
  date: 2021-10-22_05-34-48
  done: false
  episode_len_mean: 299.99
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -29.999000000000084
  episode_reward_min: -88.09999999999927
  episodes_this_iter: 1
  episodes_total: 4297
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3717067498171745e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.32019981344540915
          entropy_coeff: 0.009999999999999998
          kl: 0.007154932912326157
          policy_loss: -0.03270731460716989
          total_loss: 0.6033732366230753
          vf_explained_var: -0.6473661661148071
          vf_loss: 0.6392825491053776
    num_agent_steps_sampled: 1249000
    num_agent_steps_trained: 1249000
    num_steps_sampled: 1249000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1249,35340.8,1249000,-29.999,-22.2,-88.1,299.99


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1250000
  custom_metrics: {}
  date: 2021-10-22_05-34-59
  done: false
  episode_len_mean: 305.31
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -30.531000000000073
  episode_reward_min: -88.09999999999927
  episodes_this_iter: 1
  episodes_total: 4298
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3717067498171745e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.38387417627705467
          entropy_coeff: 0.009999999999999998
          kl: 0.012549916782563425
          policy_loss: -0.04178159816397561
          total_loss: 0.7131682753562927
          vf_explained_var: -0.6101434826850891
          vf_loss: 0.7587886151547233
    num_agent_steps_sampled: 1250000
    num_agent_steps_trained: 1250000
    num_steps_sampled: 1250000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1250,35351.2,1250000,-30.531,-22.2,-88.1,305.31


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1251000
  custom_metrics: {}
  date: 2021-10-22_05-35-10
  done: false
  episode_len_mean: 310.68
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -31.06800000000007
  episode_reward_min: -88.09999999999927
  episodes_this_iter: 1
  episodes_total: 4299
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3717067498171745e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.34967376755343543
          entropy_coeff: 0.009999999999999998
          kl: 0.009291776755458406
          policy_loss: -0.03962683594889111
          total_loss: 0.7483679185311
          vf_explained_var: -0.656919002532959
          vf_loss: 0.7914914743767845
    num_agent_steps_sampled: 1251000
    num_agent_steps_trained: 1251000
    num_steps_sampled: 1251000
    num_steps_trained: 1251000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1251,35362.6,1251000,-31.068,-22.2,-88.1,310.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1252000
  custom_metrics: {}
  date: 2021-10-22_05-35-21
  done: false
  episode_len_mean: 322.23
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -32.22300000000005
  episode_reward_min: -88.09999999999927
  episodes_this_iter: 2
  episodes_total: 4301
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3717067498171745e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.3087520269884004
          entropy_coeff: 0.009999999999999998
          kl: 0.002509253056372687
          policy_loss: 0.10086680303017298
          total_loss: 0.8974698043531841
          vf_explained_var: -0.6553210616111755
          vf_loss: 0.7996905336156488
    num_agent_steps_sampled: 1252000
    num_agent_steps_trained: 1252000
    num_steps_sampled: 1252000
    num_steps_trained: 12520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1252,35372.9,1252000,-32.223,-22.2,-88.1,322.23


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1253000
  custom_metrics: {}
  date: 2021-10-22_05-35-32
  done: false
  episode_len_mean: 327.39
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -32.73900000000005
  episode_reward_min: -88.09999999999927
  episodes_this_iter: 1
  episodes_total: 4302
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1858533749085872e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.4153410332070457
          entropy_coeff: 0.009999999999999998
          kl: 0.010608753771898231
          policy_loss: -0.048152658177746666
          total_loss: 0.767850261926651
          vf_explained_var: -0.5363832712173462
          vf_loss: 0.8201563226059079
    num_agent_steps_sampled: 1253000
    num_agent_steps_trained: 1253000
    num_steps_sampled: 1253000
    num_steps_trained: 1253

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1253,35384.6,1253000,-32.739,-22.2,-88.1,327.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1254000
  custom_metrics: {}
  date: 2021-10-22_05-35-43
  done: false
  episode_len_mean: 332.46
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -33.246000000000045
  episode_reward_min: -88.09999999999927
  episodes_this_iter: 1
  episodes_total: 4303
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1858533749085872e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.3081850348247422
          entropy_coeff: 0.009999999999999998
          kl: 0.012489500090394599
          policy_loss: -0.059657913280857934
          total_loss: 0.7585693905750911
          vf_explained_var: -0.6170935034751892
          vf_loss: 0.8213091512107187
    num_agent_steps_sampled: 1254000
    num_agent_steps_trained: 1254000
    num_steps_sampled: 1254000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1254,35395.1,1254000,-33.246,-22.2,-88.1,332.46


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1255000
  custom_metrics: {}
  date: 2021-10-22_05-35-53
  done: false
  episode_len_mean: 338.19
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -33.819000000000045
  episode_reward_min: -88.09999999999927
  episodes_this_iter: 1
  episodes_total: 4304
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1858533749085872e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.0988430305901501
          entropy_coeff: 0.009999999999999998
          kl: 0.013027786425781389
          policy_loss: -0.05648351179228889
          total_loss: 0.7664256647229195
          vf_explained_var: -0.6336173415184021
          vf_loss: 0.8238976223394274
    num_agent_steps_sampled: 1255000
    num_agent_steps_trained: 1255000
    num_steps_sampled: 1255000
    num_steps_trained: 125

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1255,35405.3,1255000,-33.819,-22.2,-88.1,338.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1256000
  custom_metrics: {}
  date: 2021-10-22_05-36-03
  done: false
  episode_len_mean: 344.86
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -34.48600000000002
  episode_reward_min: -93.89999999999894
  episodes_this_iter: 1
  episodes_total: 4305
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1858533749085872e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.07420635608335337
          entropy_coeff: 0.009999999999999998
          kl: 0.0012680326560431214
          policy_loss: -0.055082959681749345
          total_loss: 0.7678590524527762
          vf_explained_var: -0.36337971687316895
          vf_loss: 0.8236840818491247
    num_agent_steps_sampled: 1256000
    num_agent_steps_trained: 1256000
    num_steps_sampled: 1256000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1256,35414.7,1256000,-34.486,-22.2,-93.9,344.86


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1257000
  custom_metrics: {}
  date: 2021-10-22_05-36-13
  done: false
  episode_len_mean: 351.81
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -35.18100000000001
  episode_reward_min: -95.99999999999882
  episodes_this_iter: 1
  episodes_total: 4306
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0929266874542936e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.05885890242126253
          entropy_coeff: 0.009999999999999998
          kl: 0.0032479451281284513
          policy_loss: -0.04379358838001887
          total_loss: 0.7836686710516612
          vf_explained_var: -0.3481364846229553
          vf_loss: 0.8280508429018988
    num_agent_steps_sampled: 1257000
    num_agent_steps_trained: 1257000
    num_steps_sampled: 1257000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1257,35424.7,1257000,-35.181,-22.2,-96,351.81


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1258000
  custom_metrics: {}
  date: 2021-10-22_05-36-22
  done: false
  episode_len_mean: 358.81
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -35.881
  episode_reward_min: -96.09999999999881
  episodes_this_iter: 1
  episodes_total: 4307
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.464633437271468e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.111603677769502
          entropy_coeff: 0.009999999999999998
          kl: 0.0003494465708307754
          policy_loss: -0.051299992534849376
          total_loss: 0.7828214461604754
          vf_explained_var: -0.6581571102142334
          vf_loss: 0.8352374823143085
    num_agent_steps_sampled: 1258000
    num_agent_steps_trained: 1258000
    num_steps_sampled: 1258000
    num_steps_trained: 1258000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1258,35434.3,1258000,-35.881,-22.2,-96.1,358.81


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1259000
  custom_metrics: {}
  date: 2021-10-22_05-36-33
  done: false
  episode_len_mean: 365.4
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -36.539999999999985
  episode_reward_min: -96.09999999999881
  episodes_this_iter: 1
  episodes_total: 4308
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.732316718635734e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.10978552657696936
          entropy_coeff: 0.009999999999999998
          kl: 0.0011646447865598405
          policy_loss: -0.050849426289399466
          total_loss: 0.7838810920715332
          vf_explained_var: -0.6575412750244141
          vf_loss: 0.8358283704353704
    num_agent_steps_sampled: 1259000
    num_agent_steps_trained: 1259000
    num_steps_sampled: 1259000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1259,35444.6,1259000,-36.54,-22.2,-96.1,365.4


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1260000
  custom_metrics: {}
  date: 2021-10-22_05-36-43
  done: false
  episode_len_mean: 377.9
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -37.78999999999997
  episode_reward_min: -96.09999999999881
  episodes_this_iter: 2
  episodes_total: 4310
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.366158359317867e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.11648537243405978
          entropy_coeff: 0.009999999999999998
          kl: 0.001470920466769855
          policy_loss: 0.11274037808179856
          total_loss: 0.8988610116971864
          vf_explained_var: -0.5083798766136169
          vf_loss: 0.7872854762607151
    num_agent_steps_sampled: 1260000
    num_agent_steps_trained: 1260000
    num_steps_sampled: 1260000
    num_steps_trained: 126000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1260,35454.6,1260000,-37.79,-22.2,-96.1,377.9


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1261000
  custom_metrics: {}
  date: 2021-10-22_05-36-53
  done: false
  episode_len_mean: 384.53
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -38.45299999999996
  episode_reward_min: -96.09999999999881
  episodes_this_iter: 1
  episodes_total: 4311
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.830791796589335e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.10130840457148022
          entropy_coeff: 0.009999999999999998
          kl: 0.0010130326741108636
          policy_loss: 0.16305951575438182
          total_loss: 0.1675842132833269
          vf_explained_var: -0.7599833011627197
          vf_loss: 0.005537782148975465
    num_agent_steps_sampled: 1261000
    num_agent_steps_trained: 1261000
    num_steps_sampled: 1261000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1261,35465.2,1261000,-38.453,-22.2,-96.1,384.53


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1262000
  custom_metrics: {}
  date: 2021-10-22_05-37-03
  done: false
  episode_len_mean: 390.74
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -39.07399999999995
  episode_reward_min: -96.09999999999881
  episodes_this_iter: 1
  episodes_total: 4312
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4153958982946676e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.09336062429679765
          entropy_coeff: 0.009999999999999998
          kl: 0.002280394975505552
          policy_loss: -0.05374067972103755
          total_loss: 0.8217082579930624
          vf_explained_var: -0.6605218052864075
          vf_loss: 0.8763825214364462
    num_agent_steps_sampled: 1262000
    num_agent_steps_trained: 1262000
    num_steps_sampled: 1262000
    num_steps_trained: 126

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1262,35475,1262000,-39.074,-22.2,-96.1,390.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1263000
  custom_metrics: {}
  date: 2021-10-22_05-37-13
  done: false
  episode_len_mean: 397.61
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -39.76099999999994
  episode_reward_min: -96.09999999999881
  episodes_this_iter: 1
  episodes_total: 4313
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7076979491473338e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.06072509818606907
          entropy_coeff: 0.009999999999999998
          kl: 0.0061125169992130995
          policy_loss: -0.05261427296532525
          total_loss: 0.8229439147644573
          vf_explained_var: -0.6593683362007141
          vf_loss: 0.8761654425826337
    num_agent_steps_sampled: 1263000
    num_agent_steps_trained: 1263000
    num_steps_sampled: 1263000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1263,35485.1,1263000,-39.761,-22.2,-96.1,397.61


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1264000
  custom_metrics: {}
  date: 2021-10-22_05-37-23
  done: false
  episode_len_mean: 404.49
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -40.44899999999993
  episode_reward_min: -96.09999999999881
  episodes_this_iter: 1
  episodes_total: 4314
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7076979491473338e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.04207486764838298
          entropy_coeff: 0.009999999999999998
          kl: 0.0018886106271010148
          policy_loss: -0.05349861780802409
          total_loss: 0.8216785503758325
          vf_explained_var: -0.6347447037696838
          vf_loss: 0.8755979062161512
    num_agent_steps_sampled: 1264000
    num_agent_steps_trained: 1264000
    num_steps_sampled: 1264000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1264,35494.5,1264000,-40.449,-22.2,-96.1,404.49


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1265000
  custom_metrics: {}
  date: 2021-10-22_05-37-32
  done: false
  episode_len_mean: 411.66
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -41.16599999999991
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 1
  episodes_total: 4315
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.538489745736669e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.015913849458512333
          entropy_coeff: 0.009999999999999998
          kl: 0.0023234308084888688
          policy_loss: -0.057453553378582004
          total_loss: 0.8246445515089564
          vf_explained_var: -0.6167545914649963
          vf_loss: 0.8822572326494588
    num_agent_steps_sampled: 1265000
    num_agent_steps_trained: 1265000
    num_steps_sampled: 1265000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1265,35504.3,1265000,-41.166,-22.2,-97.6,411.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1266000
  custom_metrics: {}
  date: 2021-10-22_05-37-41
  done: false
  episode_len_mean: 418.95
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -41.89499999999989
  episode_reward_min: -98.79999999999866
  episodes_this_iter: 1
  episodes_total: 4316
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.2692448728683344e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.017851485260244875
          entropy_coeff: 0.009999999999999998
          kl: 5.703067056065257e-05
          policy_loss: -0.05473073687818315
          total_loss: 0.8179978057742119
          vf_explained_var: -0.43345409631729126
          vf_loss: 0.872907048670782
    num_agent_steps_sampled: 1266000
    num_agent_steps_trained: 1266000
    num_steps_sampled: 1266000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1266,35513.4,1266000,-41.895,-22.2,-98.8,418.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1267000
  custom_metrics: {}
  date: 2021-10-22_05-37-51
  done: false
  episode_len_mean: 426.37
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -42.63699999999988
  episode_reward_min: -99.59999999999862
  episodes_this_iter: 1
  episodes_total: 4317
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1346224364341672e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.057208588822848266
          entropy_coeff: 0.009999999999999998
          kl: 0.0035732825062014363
          policy_loss: -0.05413329054911931
          total_loss: 0.8033146608206961
          vf_explained_var: -0.5148075222969055
          vf_loss: 0.8580200356741746
    num_agent_steps_sampled: 1267000
    num_agent_steps_trained: 1267000
    num_steps_sampled: 1267000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1267,35523.2,1267000,-42.637,-22.2,-99.6,426.37


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1268000
  custom_metrics: {}
  date: 2021-10-22_05-38-00
  done: false
  episode_len_mean: 433.73
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -43.37299999999988
  episode_reward_min: -99.59999999999862
  episodes_this_iter: 1
  episodes_total: 4318
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0673112182170836e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.04894244484603405
          entropy_coeff: 0.009999999999999998
          kl: 0.0024299638610794636
          policy_loss: -0.05394956916570663
          total_loss: 0.8111135640078121
          vf_explained_var: -0.392733633518219
          vf_loss: 0.8655525826331641
    num_agent_steps_sampled: 1268000
    num_agent_steps_trained: 1268000
    num_steps_sampled: 1268000
    num_steps_trained: 126

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1268,35532.3,1268000,-43.373,-22.2,-99.6,433.73


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1269000
  custom_metrics: {}
  date: 2021-10-22_05-38-10
  done: false
  episode_len_mean: 441.0
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -44.099999999999866
  episode_reward_min: -99.59999999999862
  episodes_this_iter: 1
  episodes_total: 4319
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.336556091085418e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.04244141150265932
          entropy_coeff: 0.009999999999999998
          kl: 0.0065012110858059515
          policy_loss: -0.04969911840226915
          total_loss: 0.7982602980401781
          vf_explained_var: -0.28877416253089905
          vf_loss: 0.8483838471273581
    num_agent_steps_sampled: 1269000
    num_agent_steps_trained: 1269000
    num_steps_sampled: 1269000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1269,35542.1,1269000,-44.1,-22.2,-99.6,441




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1270000
  custom_metrics: {}
  date: 2021-10-22_05-38-36
  done: false
  episode_len_mean: 446.72
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -44.67199999999986
  episode_reward_min: -99.59999999999862
  episodes_this_iter: 1
  episodes_total: 4320
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.336556091085418e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.09302545388539632
          entropy_coeff: 0.009999999999999998
          kl: 0.007397225841631538
          policy_loss: -0.053329875734117294
          total_loss: 0.7917084818085035
          vf_explained_var: -0.6241967678070068
          vf_loss: 0.8459686139184568
    num_agent_steps_sampled: 1270000
    num_agent_steps_trained: 1270000
    num_steps_sampled: 1270000
    num_steps_trained: 127

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1270,35568.4,1270000,-44.672,-22.2,-99.6,446.72


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1271000
  custom_metrics: {}
  date: 2021-10-22_05-38-48
  done: false
  episode_len_mean: 453.8
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -45.37999999999985
  episode_reward_min: -99.59999999999862
  episodes_this_iter: 1
  episodes_total: 4321
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.336556091085418e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.07206915302409066
          entropy_coeff: 0.009999999999999998
          kl: 0.010547822959744638
          policy_loss: -0.054412874744998084
          total_loss: 0.7996300642689069
          vf_explained_var: -0.5687053203582764
          vf_loss: 0.8547636115716564
    num_agent_steps_sampled: 1271000
    num_agent_steps_trained: 1271000
    num_steps_sampled: 1271000
    num_steps_trained: 1271

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1271,35580.3,1271000,-45.38,-22.2,-99.6,453.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1272000
  custom_metrics: {}
  date: 2021-10-22_05-38-58
  done: false
  episode_len_mean: 460.83
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -46.082999999999835
  episode_reward_min: -99.59999999999862
  episodes_this_iter: 1
  episodes_total: 4322
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.336556091085418e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.048337016999721524
          entropy_coeff: 0.009999999999999998
          kl: 0.003837843912341664
          policy_loss: -0.05229342877864838
          total_loss: 0.8020517870783805
          vf_explained_var: -0.34670016169548035
          vf_loss: 0.8548285769505634
    num_agent_steps_sampled: 1272000
    num_agent_steps_trained: 1272000
    num_steps_sampled: 1272000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1272,35589.6,1272000,-46.083,-22.2,-99.6,460.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1273000
  custom_metrics: {}
  date: 2021-10-22_05-39-08
  done: false
  episode_len_mean: 467.7
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -46.76999999999982
  episode_reward_min: -99.59999999999862
  episodes_this_iter: 1
  episodes_total: 4323
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.668278045542709e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.030209885103007157
          entropy_coeff: 0.009999999999999998
          kl: 0.002037592907044288
          policy_loss: -0.05298909528387918
          total_loss: 0.7928498336010509
          vf_explained_var: -0.2075110375881195
          vf_loss: 0.8461410330401526
    num_agent_steps_sampled: 1273000
    num_agent_steps_trained: 1273000
    num_steps_sampled: 1273000
    num_steps_trained: 1273

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1273,35599.7,1273000,-46.77,-22.2,-99.6,467.7


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1274000
  custom_metrics: {}
  date: 2021-10-22_05-39-17
  done: false
  episode_len_mean: 474.93
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -47.4929999999998
  episode_reward_min: -99.59999999999862
  episodes_this_iter: 1
  episodes_total: 4324
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3341390227713545e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.05812843230863412
          entropy_coeff: 0.009999999999999998
          kl: 0.006352627225487476
          policy_loss: -0.053525424169169535
          total_loss: 0.7996141951945093
          vf_explained_var: -0.6607736945152283
          vf_loss: 0.8537209028378129
    num_agent_steps_sampled: 1274000
    num_agent_steps_trained: 1274000
    num_steps_sampled: 1274000
    num_steps_trained: 127

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1274,35608.9,1274000,-47.493,-22.2,-99.6,474.93


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1275000
  custom_metrics: {}
  date: 2021-10-22_05-39-27
  done: false
  episode_len_mean: 482.09
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -48.20899999999979
  episode_reward_min: -99.59999999999862
  episodes_this_iter: 1
  episodes_total: 4325
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3341390227713545e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.028301106227768793
          entropy_coeff: 0.009999999999999998
          kl: 0.0071884555762752695
          policy_loss: -0.05098630636930466
          total_loss: 0.7854494012064404
          vf_explained_var: -0.4968973398208618
          vf_loss: 0.8367187119399507
    num_agent_steps_sampled: 1275000
    num_agent_steps_trained: 1275000
    num_steps_sampled: 1275000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1275,35618.7,1275000,-48.209,-22.2,-99.6,482.09


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1276000
  custom_metrics: {}
  date: 2021-10-22_05-39-36
  done: false
  episode_len_mean: 489.46
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -48.94599999999977
  episode_reward_min: -99.59999999999862
  episodes_this_iter: 1
  episodes_total: 4326
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3341390227713545e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.01609562420182758
          entropy_coeff: 0.009999999999999998
          kl: 0.0019069299631529516
          policy_loss: -0.042205682065751815
          total_loss: 0.7825378429558542
          vf_explained_var: -0.6442004442214966
          vf_loss: 0.824904470373359
    num_agent_steps_sampled: 1276000
    num_agent_steps_trained: 1276000
    num_steps_sampled: 1276000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1276,35627.9,1276000,-48.946,-22.2,-99.6,489.46


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1277000
  custom_metrics: {}
  date: 2021-10-22_05-39-46
  done: false
  episode_len_mean: 496.56
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -49.65599999999976
  episode_reward_min: -99.59999999999862
  episodes_this_iter: 1
  episodes_total: 4327
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.6706951138567726e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.014649540237668488
          entropy_coeff: 0.009999999999999998
          kl: 0.0004053834031831152
          policy_loss: -0.039613642791906996
          total_loss: 0.7525411441922187
          vf_explained_var: -0.6589361429214478
          vf_loss: 0.79230127144191
    num_agent_steps_sampled: 1277000
    num_agent_steps_trained: 1277000
    num_steps_sampled: 1277000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1277,35637.7,1277000,-49.656,-22.2,-99.6,496.56


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1278000
  custom_metrics: {}
  date: 2021-10-22_05-39-55
  done: false
  episode_len_mean: 503.68
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -50.36799999999973
  episode_reward_min: -99.59999999999862
  episodes_this_iter: 1
  episodes_total: 4328
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3353475569283863e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.006805529098750817
          entropy_coeff: 0.009999999999999998
          kl: 0.0007757934227131972
          policy_loss: -0.024659818245304957
          total_loss: 0.591994604219993
          vf_explained_var: -0.648937463760376
          vf_loss: 0.6167224720482611
    num_agent_steps_sampled: 1278000
    num_agent_steps_trained: 1278000
    num_steps_sampled: 1278000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1278,35646.9,1278000,-50.368,-22.2,-99.6,503.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1279000
  custom_metrics: {}
  date: 2021-10-22_05-40-05
  done: false
  episode_len_mean: 511.06
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -51.105999999999725
  episode_reward_min: -99.59999999999862
  episodes_this_iter: 1
  episodes_total: 4329
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6676737784641931e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.008909542348960207
          entropy_coeff: 0.009999999999999998
          kl: 0.0005201862921494043
          policy_loss: -0.01686250641942024
          total_loss: 0.45593660573164624
          vf_explained_var: -0.6373338103294373
          vf_loss: 0.47288820344126886
    num_agent_steps_sampled: 1279000
    num_agent_steps_trained: 1279000
    num_steps_sampled: 1279000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1279,35656.6,1279000,-51.106,-22.2,-99.6,511.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1280000
  custom_metrics: {}
  date: 2021-10-22_05-40-14
  done: false
  episode_len_mean: 518.81
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -51.880999999999716
  episode_reward_min: -99.69999999999861
  episodes_this_iter: 1
  episodes_total: 4330
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.338368892320966e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.007927354482106037
          entropy_coeff: 0.009999999999999998
          kl: 7.413029260483515e-05
          policy_loss: -0.011193973902199004
          total_loss: 0.2724497879544894
          vf_explained_var: -0.6606644988059998
          vf_loss: 0.2837230289768842
    num_agent_steps_sampled: 1280000
    num_agent_steps_trained: 1280000
    num_steps_sampled: 1280000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1280,35665.8,1280000,-51.881,-23,-99.7,518.81


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1281000
  custom_metrics: {}
  date: 2021-10-22_05-40-24
  done: false
  episode_len_mean: 525.66
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -52.565999999999704
  episode_reward_min: -99.69999999999861
  episodes_this_iter: 1
  episodes_total: 4331
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.169184446160483e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.022381183924153447
          entropy_coeff: 0.009999999999999998
          kl: 0.0014159285474243167
          policy_loss: -0.05344871597157584
          total_loss: 0.8567812997433875
          vf_explained_var: -0.554736852645874
          vf_loss: 0.9104538370544711
    num_agent_steps_sampled: 1281000
    num_agent_steps_trained: 1281000
    num_steps_sampled: 1281000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1281,35675.6,1281000,-52.566,-23,-99.7,525.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1282000
  custom_metrics: {}
  date: 2021-10-22_05-40-33
  done: false
  episode_len_mean: 532.74
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -53.27399999999969
  episode_reward_min: -99.69999999999861
  episodes_this_iter: 1
  episodes_total: 4332
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0845922230802414e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.008077836109118329
          entropy_coeff: 0.009999999999999998
          kl: 7.169425150244102e-05
          policy_loss: -0.05394947661293877
          total_loss: 0.85138258718782
          vf_explained_var: -0.6597061157226562
          vf_loss: 0.9054128351931771
    num_agent_steps_sampled: 1282000
    num_agent_steps_trained: 1282000
    num_steps_sampled: 1282000
    num_steps_trained: 128

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1282,35684.9,1282000,-53.274,-23,-99.7,532.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1283000
  custom_metrics: {}
  date: 2021-10-22_05-40-43
  done: false
  episode_len_mean: 540.19
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -54.01899999999967
  episode_reward_min: -99.69999999999861
  episodes_this_iter: 1
  episodes_total: 4333
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0422961115401207e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.008247552015301253
          entropy_coeff: 0.009999999999999998
          kl: 5.9089487549971827e-05
          policy_loss: -0.05381611386934916
          total_loss: 0.8351774611406856
          vf_explained_var: -0.6576521396636963
          vf_loss: 0.8890760625402133
    num_agent_steps_sampled: 1283000
    num_agent_steps_trained: 1283000
    num_steps_sampled: 1283000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1283,35694.3,1283000,-54.019,-23,-99.7,540.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1284000
  custom_metrics: {}
  date: 2021-10-22_05-40-52
  done: false
  episode_len_mean: 547.65
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -54.76499999999965
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4334
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.2114805577006036e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.006014703767788079
          entropy_coeff: 0.009999999999999998
          kl: 0.00025114378910693345
          policy_loss: -0.0530240492688285
          total_loss: 0.8323285920752419
          vf_explained_var: -0.6612409353256226
          vf_loss: 0.8854127713375621
    num_agent_steps_sampled: 1284000
    num_agent_steps_trained: 1284000
    num_steps_sampled: 1284000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1284,35703.9,1284000,-54.765,-23,-100,547.65


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1285000
  custom_metrics: {}
  date: 2021-10-22_05-41-02
  done: false
  episode_len_mean: 555.02
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -55.501999999999626
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4335
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6057402788503018e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.018663798515788383
          entropy_coeff: 0.009999999999999998
          kl: 0.0012172213306765262
          policy_loss: -0.05242625011338128
          total_loss: 0.824910941057735
          vf_explained_var: -0.6564106345176697
          vf_loss: 0.877523837176462
    num_agent_steps_sampled: 1285000
    num_agent_steps_trained: 1285000
    num_steps_sampled: 1285000
    num_steps_trained: 128

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1285,35713.1,1285000,-55.502,-23,-100,555.02


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1286000
  custom_metrics: {}
  date: 2021-10-22_05-41-11
  done: false
  episode_len_mean: 562.48
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -56.247999999999614
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4336
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3028701394251509e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.010900732005635898
          entropy_coeff: 0.009999999999999998
          kl: 9.778815000621598e-05
          policy_loss: -0.050431870089636906
          total_loss: 0.8110103110472361
          vf_explained_var: -0.6563742160797119
          vf_loss: 0.8615511749767595
    num_agent_steps_sampled: 1286000
    num_agent_steps_trained: 1286000
    num_steps_sampled: 1286000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1286,35722.8,1286000,-56.248,-23,-100,562.48


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1287000
  custom_metrics: {}
  date: 2021-10-22_05-41-21
  done: false
  episode_len_mean: 569.92
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -56.99199999999959
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4337
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.5143506971257545e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.009093238775514894
          entropy_coeff: 0.009999999999999998
          kl: 6.411036197285903e-05
          policy_loss: -0.050404199295573764
          total_loss: 0.8178051557805803
          vf_explained_var: -0.656853973865509
          vf_loss: 0.8683002916061215
    num_agent_steps_sampled: 1287000
    num_agent_steps_trained: 1287000
    num_steps_sampled: 1287000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1287,35732,1287000,-56.992,-23,-100,569.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1288000
  custom_metrics: {}
  date: 2021-10-22_05-41-30
  done: false
  episode_len_mean: 577.4
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -57.73999999999958
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4338
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2571753485628772e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.01000228553182549
          entropy_coeff: 0.009999999999999998
          kl: 2.5143140198865175e-05
          policy_loss: -0.05132511307795842
          total_loss: 0.8163438944352998
          vf_explained_var: -0.6573114991188049
          vf_loss: 0.8677690460450119
    num_agent_steps_sampled: 1288000
    num_agent_steps_trained: 1288000
    num_steps_sampled: 1288000
    num_steps_trained: 128

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1288,35741.6,1288000,-57.74,-23,-100,577.4


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1289000
  custom_metrics: {}
  date: 2021-10-22_05-41-39
  done: false
  episode_len_mean: 584.88
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -58.487999999999566
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4339
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6285876742814386e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.023980464993251696
          entropy_coeff: 0.009999999999999998
          kl: 0.0015129141402623534
          policy_loss: -0.05253590676519606
          total_loss: 0.8112418272429043
          vf_explained_var: -0.6544595956802368
          vf_loss: 0.8640175472531054
    num_agent_steps_sampled: 1289000
    num_agent_steps_trained: 1289000
    num_steps_sampled: 1289000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1289,35750.7,1289000,-58.488,-23,-100,584.88


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1290000
  custom_metrics: {}
  date: 2021-10-22_05-41-49
  done: false
  episode_len_mean: 592.28
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -59.22799999999955
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4340
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.142938371407193e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.015029982270465956
          entropy_coeff: 0.009999999999999998
          kl: 0.0007088084568447142
          policy_loss: -0.05647467805279626
          total_loss: 0.8068710477815734
          vf_explained_var: -0.6371814012527466
          vf_loss: 0.8634960170214375
    num_agent_steps_sampled: 1290000
    num_agent_steps_trained: 1290000
    num_steps_sampled: 1290000
    num_steps_trained: 129

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1290,35760.4,1290000,-59.228,-23,-100,592.28


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1291000
  custom_metrics: {}
  date: 2021-10-22_05-41-58
  done: false
  episode_len_mean: 599.74
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -59.97399999999955
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4341
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.0714691857035965e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.011074134438402123
          entropy_coeff: 0.009999999999999998
          kl: 0.00010614455677492677
          policy_loss: -0.05017555571264691
          total_loss: 0.8264374592238002
          vf_explained_var: -0.6572207808494568
          vf_loss: 0.8767237554200822
    num_agent_steps_sampled: 1291000
    num_agent_steps_trained: 1291000
    num_steps_sampled: 1291000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1291,35769.5,1291000,-59.974,-23,-100,599.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1292000
  custom_metrics: {}
  date: 2021-10-22_05-42-08
  done: false
  episode_len_mean: 607.28
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -60.72799999999954
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4342
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0357345928517983e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.029257401575644812
          entropy_coeff: 0.009999999999999998
          kl: 0.007702565467900152
          policy_loss: -0.050004627803961435
          total_loss: 0.8275338161322806
          vf_explained_var: -0.6414975523948669
          vf_loss: 0.8778310254009234
    num_agent_steps_sampled: 1292000
    num_agent_steps_trained: 1292000
    num_steps_sampled: 1292000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1292,35779.4,1292000,-60.728,-23,-100,607.28


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1293000
  custom_metrics: {}
  date: 2021-10-22_05-42-17
  done: false
  episode_len_mean: 614.78
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -61.47799999999952
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4343
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0357345928517983e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.028408725808064143
          entropy_coeff: 0.009999999999999998
          kl: 0.0023872539430290847
          policy_loss: -0.05145254598723518
          total_loss: 0.8194546201162868
          vf_explained_var: -0.6586474180221558
          vf_loss: 0.8711912350108226
    num_agent_steps_sampled: 1293000
    num_agent_steps_trained: 1293000
    num_steps_sampled: 1293000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1293,35788.4,1293000,-61.478,-23,-100,614.78


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1294000
  custom_metrics: {}
  date: 2021-10-22_05-42-27
  done: false
  episode_len_mean: 622.26
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -62.225999999999495
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4344
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0178672964258991e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.07131040895150767
          entropy_coeff: 0.009999999999999998
          kl: 0.03809483530423247
          policy_loss: -0.05103734334309896
          total_loss: 0.81991182681587
          vf_explained_var: -0.6486321091651917
          vf_loss: 0.8716622754310568
    num_agent_steps_sampled: 1294000
    num_agent_steps_trained: 1294000
    num_steps_sampled: 1294000
    num_steps_trained: 129400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1294,35798.2,1294000,-62.226,-23,-100,622.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1295000
  custom_metrics: {}
  date: 2021-10-22_05-42-36
  done: false
  episode_len_mean: 629.66
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -62.96599999999948
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4345
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5268009446388492e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.16725985763801468
          entropy_coeff: 0.009999999999999998
          kl: 0.006209861248641128
          policy_loss: -0.05426354044013553
          total_loss: 0.8218822260697682
          vf_explained_var: -0.5879229307174683
          vf_loss: 0.8778183621664842
    num_agent_steps_sampled: 1295000
    num_agent_steps_trained: 1295000
    num_steps_sampled: 1295000
    num_steps_trained: 1295

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1295,35807.6,1295000,-62.966,-23,-100,629.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1296000
  custom_metrics: {}
  date: 2021-10-22_05-42-47
  done: false
  episode_len_mean: 636.62
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -63.66199999999947
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4346
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5268009446388492e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.20215752803617054
          entropy_coeff: 0.009999999999999998
          kl: 0.0038803517369365814
          policy_loss: -0.053771189020739664
          total_loss: 0.8175181935230891
          vf_explained_var: -0.6477288603782654
          vf_loss: 0.8733109493429462
    num_agent_steps_sampled: 1296000
    num_agent_steps_trained: 1296000
    num_steps_sampled: 1296000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1296,35818.2,1296000,-63.662,-23,-100,636.62


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1297000
  custom_metrics: {}
  date: 2021-10-22_05-42-57
  done: false
  episode_len_mean: 650.31
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -65.03099999999945
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4348
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.634004723194246e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.3920067760679457
          entropy_coeff: 0.009999999999999998
          kl: 0.11583077218769959
          policy_loss: 0.10037925276491377
          total_loss: 0.8648736314641104
          vf_explained_var: -0.5834687352180481
          vf_loss: 0.768414443017294
    num_agent_steps_sampled: 1297000
    num_agent_steps_trained: 1297000
    num_steps_sampled: 1297000
    num_steps_trained: 1297000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1297,35827.9,1297000,-65.031,-23,-100,650.31


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1298000
  custom_metrics: {}
  date: 2021-10-22_05-43-12
  done: false
  episode_len_mean: 655.13
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -65.51299999999945
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4349
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.145100708479137e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.6509509871403376
          entropy_coeff: 0.009999999999999998
          kl: 0.08374233675973086
          policy_loss: -0.051274439858065714
          total_loss: 0.7631599265668128
          vf_explained_var: -0.27465716004371643
          vf_loss: 0.8209438830200169
    num_agent_steps_sampled: 1298000
    num_agent_steps_trained: 1298000
    num_steps_sampled: 1298000
    num_steps_trained: 12980

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1298,35842.8,1298000,-65.513,-23,-100,655.13




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1299000
  custom_metrics: {}
  date: 2021-10-22_05-43-46
  done: false
  episode_len_mean: 658.72
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -65.87199999999945
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4351
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7176510627187054e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.4956960633397102
          entropy_coeff: 0.009999999999999998
          kl: 0.07303525167230415
          policy_loss: -0.08867857588662041
          total_loss: 1.4104708929856618
          vf_explained_var: -0.07053399831056595
          vf_loss: 1.504106424583329
    num_agent_steps_sampled: 1299000
    num_agent_steps_trained: 1299000
    num_steps_sampled: 1299000
    num_steps_trained: 129900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1299,35877.4,1299000,-65.872,-23,-100,658.72


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1300000
  custom_metrics: {}
  date: 2021-10-22_05-44-04
  done: false
  episode_len_mean: 663.22
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -66.32199999999946
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4353
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5764765940780584e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.5542346715927124
          entropy_coeff: 0.009999999999999998
          kl: 0.04388368007299582
          policy_loss: -0.0794192640317811
          total_loss: 1.2793326314952638
          vf_explained_var: 0.17600928246974945
          vf_loss: 1.3642942225767507
    num_agent_steps_sampled: 1300000
    num_agent_steps_trained: 1300000
    num_steps_sampled: 1300000
    num_steps_trained: 1300000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1300,35895.5,1300000,-66.322,-23,-100,663.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1301000
  custom_metrics: {}
  date: 2021-10-22_05-44-22
  done: false
  episode_len_mean: 668.59
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -66.85899999999947
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4355
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.8647148911170885e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.3808390867378977
          entropy_coeff: 0.009999999999999998
          kl: 0.022302125780413302
          policy_loss: -0.042249143454763625
          total_loss: 1.2979092246956296
          vf_explained_var: -0.2085205316543579
          vf_loss: 1.3439667681852976
    num_agent_steps_sampled: 1301000
    num_agent_steps_trained: 1301000
    num_steps_sampled: 1301000
    num_steps_trained: 1301

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1301,35913.1,1301000,-66.859,-23,-100,668.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1302000
  custom_metrics: {}
  date: 2021-10-22_05-44-34
  done: false
  episode_len_mean: 674.91
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -67.49099999999947
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4357
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.7970723366756325e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.285083536307017
          entropy_coeff: 0.009999999999999998
          kl: 0.007451099532634211
          policy_loss: 0.1122296628024843
          total_loss: 0.8202632145749198
          vf_explained_var: -0.4340302348136902
          vf_loss: 0.7108843869219224
    num_agent_steps_sampled: 1302000
    num_agent_steps_trained: 1302000
    num_steps_sampled: 1302000
    num_steps_trained: 1302000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1302,35924.8,1302000,-67.491,-23,-100,674.91


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1303000
  custom_metrics: {}
  date: 2021-10-22_05-44-44
  done: false
  episode_len_mean: 680.56
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -68.05599999999947
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4358
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.7970723366756325e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.29621108306778804
          entropy_coeff: 0.009999999999999998
          kl: 0.005205399309214191
          policy_loss: -0.057251717978053625
          total_loss: 0.6742448765370581
          vf_explained_var: -0.511536180973053
          vf_loss: 0.7344587040444215
    num_agent_steps_sampled: 1303000
    num_agent_steps_trained: 1303000
    num_steps_sampled: 1303000
    num_steps_trained: 1303

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1303,35935.2,1303000,-68.056,-23,-100,680.56


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1304000
  custom_metrics: {}
  date: 2021-10-22_05-44-55
  done: false
  episode_len_mean: 686.96
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -68.69599999999946
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4359
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.7970723366756325e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.2594548657536507
          entropy_coeff: 0.009999999999999998
          kl: 0.008844942261881646
          policy_loss: -0.0058531412647830116
          total_loss: 0.08933617133233282
          vf_explained_var: -0.4012450873851776
          vf_loss: 0.09778385823075142
    num_agent_steps_sampled: 1304000
    num_agent_steps_trained: 1304000
    num_steps_sampled: 1304000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1304,35945.8,1304000,-68.696,-23,-100,686.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1305000
  custom_metrics: {}
  date: 2021-10-22_05-45-06
  done: false
  episode_len_mean: 692.87
  episode_media: {}
  episode_reward_max: -24.400000000000077
  episode_reward_mean: -69.28699999999944
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4360
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.7970723366756325e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.2481297082371182
          entropy_coeff: 0.009999999999999998
          kl: 0.005648186046732917
          policy_loss: -0.040456928809483844
          total_loss: 0.7045829473270311
          vf_explained_var: -0.6272451877593994
          vf_loss: 0.7475211792315046
    num_agent_steps_sampled: 1305000
    num_agent_steps_trained: 1305000
    num_steps_sampled: 1305000
    num_steps_trained: 1305

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1305,35956.9,1305000,-69.287,-24.4,-100,692.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1306000
  custom_metrics: {}
  date: 2021-10-22_05-45-17
  done: false
  episode_len_mean: 697.87
  episode_media: {}
  episode_reward_max: -24.400000000000077
  episode_reward_mean: -69.78699999999944
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4361
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.7970723366756325e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.21957205368412866
          entropy_coeff: 0.009999999999999998
          kl: 0.015455732697918181
          policy_loss: -0.059593363685740364
          total_loss: 0.7357458929220836
          vf_explained_var: -0.3493926525115967
          vf_loss: 0.7975349778930346
    num_agent_steps_sampled: 1306000
    num_agent_steps_trained: 1306000
    num_steps_sampled: 1306000
    num_steps_trained: 130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1306,35967.9,1306000,-69.787,-24.4,-100,697.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1307000
  custom_metrics: {}
  date: 2021-10-22_05-45-27
  done: false
  episode_len_mean: 707.69
  episode_media: {}
  episode_reward_max: -24.400000000000077
  episode_reward_mean: -70.76899999999944
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4363
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.7970723366756325e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.19783481988641952
          entropy_coeff: 0.009999999999999998
          kl: 0.004279929070743628
          policy_loss: 0.10065984692838457
          total_loss: 0.9085129148430294
          vf_explained_var: -0.6530115604400635
          vf_loss: 0.8098314003811942
    num_agent_steps_sampled: 1307000
    num_agent_steps_trained: 1307000
    num_steps_sampled: 1307000
    num_steps_trained: 13070

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1307,35978.4,1307000,-70.769,-24.4,-100,707.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1308000
  custom_metrics: {}
  date: 2021-10-22_05-45-38
  done: false
  episode_len_mean: 713.84
  episode_media: {}
  episode_reward_max: -24.400000000000077
  episode_reward_mean: -71.38399999999942
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4364
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8985361683378162e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.6726280490557353
          entropy_coeff: 0.009999999999999998
          kl: 0.1057801787669126
          policy_loss: 0.15152681552701527
          total_loss: 0.15179395973682402
          vf_explained_var: -0.05444181710481644
          vf_loss: 0.00699342488983853
    num_agent_steps_sampled: 1308000
    num_agent_steps_trained: 1308000
    num_steps_sampled: 1308000
    num_steps_trained: 13080

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1308,35988.9,1308000,-71.384,-24.4,-100,713.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1309000
  custom_metrics: {}
  date: 2021-10-22_05-45-49
  done: false
  episode_len_mean: 718.59
  episode_media: {}
  episode_reward_max: -24.400000000000077
  episode_reward_mean: -71.85899999999943
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4365
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.347804252506724e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.7195805466837353
          entropy_coeff: 0.009999999999999998
          kl: 0.07055841591796339
          policy_loss: -0.03986916144688924
          total_loss: 0.8010785068074863
          vf_explained_var: -0.42839303612709045
          vf_loss: 0.8481434958883458
    num_agent_steps_sampled: 1309000
    num_agent_steps_trained: 1309000
    num_steps_sampled: 1309000
    num_steps_trained: 130900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1309,36000.3,1309000,-71.859,-24.4,-100,718.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1310000
  custom_metrics: {}
  date: 2021-10-22_05-46-00
  done: false
  episode_len_mean: 724.15
  episode_media: {}
  episode_reward_max: -24.400000000000077
  episode_reward_mean: -72.41499999999942
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4366
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.521706378760088e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.6864556537734138
          entropy_coeff: 0.009999999999999998
          kl: 0.027102276088821972
          policy_loss: -0.05965411331918505
          total_loss: 0.8045773673388693
          vf_explained_var: -0.6590716242790222
          vf_loss: 0.8710960425850418
    num_agent_steps_sampled: 1310000
    num_agent_steps_trained: 1310000
    num_steps_sampled: 1310000
    num_steps_trained: 131000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1310,36010.9,1310000,-72.415,-24.4,-100,724.15


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1311000
  custom_metrics: {}
  date: 2021-10-22_05-46-11
  done: false
  episode_len_mean: 735.28
  episode_media: {}
  episode_reward_max: -24.400000000000077
  episode_reward_mean: -73.52799999999941
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4368
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.782559568140126e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.6375273313787249
          entropy_coeff: 0.009999999999999998
          kl: 0.01607359144214142
          policy_loss: 0.08629970335298115
          total_loss: 0.9446600480212106
          vf_explained_var: -0.6353816390037537
          vf_loss: 0.8647356133080191
    num_agent_steps_sampled: 1311000
    num_agent_steps_trained: 1311000
    num_steps_sampled: 1311000
    num_steps_trained: 1311000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1311,36022.1,1311000,-73.528,-24.4,-100,735.28


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1312000
  custom_metrics: {}
  date: 2021-10-22_05-46-22
  done: false
  episode_len_mean: 741.32
  episode_media: {}
  episode_reward_max: -24.400000000000077
  episode_reward_mean: -74.1319999999994
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4369
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.782559568140126e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.914149033361011
          entropy_coeff: 0.009999999999999998
          kl: 0.384488639654703
          policy_loss: 0.15604974114232592
          total_loss: 0.15023172895113626
          vf_explained_var: -0.3783648908138275
          vf_loss: 0.003323478903621435
    num_agent_steps_sampled: 1312000
    num_agent_steps_trained: 1312000
    num_steps_sampled: 1312000
    num_steps_trained: 1312000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1312,36032.6,1312000,-74.132,-24.4,-100,741.32


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1313000
  custom_metrics: {}
  date: 2021-10-22_05-46-46
  done: false
  episode_len_mean: 744.24
  episode_media: {}
  episode_reward_max: -24.400000000000077
  episode_reward_mean: -74.42399999999941
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 3
  episodes_total: 4372
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4673839352210188e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.8260581182108985
          entropy_coeff: 0.009999999999999998
          kl: 0.04530356924428634
          policy_loss: 0.037254985670248666
          total_loss: 1.6911708725823296
          vf_explained_var: -0.2556610703468323
          vf_loss: 1.662176478985283
    num_agent_steps_sampled: 1313000
    num_agent_steps_trained: 1313000
    num_steps_sampled: 1313000
    num_steps_trained: 1313000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1313,36056.9,1313000,-74.424,-24.4,-100,744.24


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1314000
  custom_metrics: {}
  date: 2021-10-22_05-47-09
  done: false
  episode_len_mean: 746.77
  episode_media: {}
  episode_reward_max: -24.400000000000077
  episode_reward_mean: -74.67699999999941
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 3
  episodes_total: 4375
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2010759028315283e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.8598469005690681
          entropy_coeff: 0.009999999999999998
          kl: 0.032571897315660674
          policy_loss: 0.060097164495123756
          total_loss: 1.355060887005594
          vf_explained_var: -0.16559430956840515
          vf_loss: 1.3035621802839967
    num_agent_steps_sampled: 1314000
    num_agent_steps_trained: 1314000
    num_steps_sampled: 1314000
    num_steps_trained: 13140

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1314,36080.1,1314000,-74.677,-24.4,-100,746.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1315000
  custom_metrics: {}
  date: 2021-10-22_05-47-35
  done: false
  episode_len_mean: 748.09
  episode_media: {}
  episode_reward_max: -24.400000000000077
  episode_reward_mean: -74.8089999999994
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 3
  episodes_total: 4378
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3016138542472928e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.8393780681822035
          entropy_coeff: 0.009999999999999998
          kl: 0.0495489152287285
          policy_loss: -0.11105162484778298
          total_loss: 2.0279667417208356
          vf_explained_var: 0.11038100719451904
          vf_loss: 2.147412159707811
    num_agent_steps_sampled: 1315000
    num_agent_steps_trained: 1315000
    num_steps_sampled: 1315000
    num_steps_trained: 1315000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1315,36106,1315000,-74.809,-24.4,-100,748.09




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1316000
  custom_metrics: {}
  date: 2021-10-22_05-48-25
  done: false
  episode_len_mean: 747.85
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -74.7849999999994
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 5
  episodes_total: 4383
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.952420781370941e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.9751431418789758
          entropy_coeff: 0.009999999999999998
          kl: 0.48546044193052823
          policy_loss: -0.0010698366496298048
          total_loss: 2.2529075198703343
          vf_explained_var: 0.21687719225883484
          vf_loss: 2.263728760348426
    num_agent_steps_sampled: 1316000
    num_agent_steps_trained: 1316000
    num_steps_sampled: 1316000
    num_steps_trained: 1316000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1316,36155.6,1316000,-74.785,-21.7,-100,747.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1317000
  custom_metrics: {}
  date: 2021-10-22_05-49-02
  done: false
  episode_len_mean: 746.97
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -74.6969999999994
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 4
  episodes_total: 4387
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.42863117205641e-24
          cur_lr: 5.000000000000001e-05
          entropy: 1.2736353874206543
          entropy_coeff: 0.009999999999999998
          kl: 0.04747506190324494
          policy_loss: 0.02317739716834492
          total_loss: 2.3492251488897535
          vf_explained_var: 0.10479863733053207
          vf_loss: 2.3387841012742783
    num_agent_steps_sampled: 1317000
    num_agent_steps_trained: 1317000
    num_steps_sampled: 1317000
    num_steps_trained: 1317000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1317,36193,1317000,-74.697,-21.7,-100,746.97


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1318000
  custom_metrics: {}
  date: 2021-10-22_05-49-35
  done: false
  episode_len_mean: 741.51
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -74.15099999999941
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 4
  episodes_total: 4391
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1142946758084616e-23
          cur_lr: 5.000000000000001e-05
          entropy: 1.0436338239245944
          entropy_coeff: 0.009999999999999998
          kl: 1.6305854217177622
          policy_loss: -0.009559088779820337
          total_loss: 1.6922601580619812
          vf_explained_var: 0.31935423612594604
          vf_loss: 1.712255597114563
    num_agent_steps_sampled: 1318000
    num_agent_steps_trained: 1318000
    num_steps_sampled: 1318000
    num_steps_trained: 1318000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1318,36226,1318000,-74.151,-21.7,-100,741.51


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1319000
  custom_metrics: {}
  date: 2021-10-22_05-50-12
  done: false
  episode_len_mean: 718.94
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -71.89399999999941
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 4
  episodes_total: 4395
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.671442013712692e-23
          cur_lr: 5.000000000000001e-05
          entropy: 1.0406590428617266
          entropy_coeff: 0.009999999999999998
          kl: 0.13076266986019663
          policy_loss: -0.008388148496548335
          total_loss: 1.6000270101759169
          vf_explained_var: 0.12260548025369644
          vf_loss: 1.6188217401504517
    num_agent_steps_sampled: 1319000
    num_agent_steps_trained: 1319000
    num_steps_sampled: 1319000
    num_steps_trained: 131900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1319,36262.5,1319000,-71.894,-21.5,-100,718.94


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1320000
  custom_metrics: {}
  date: 2021-10-22_05-50-24
  done: false
  episode_len_mean: 715.77
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -71.57699999999943
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4396
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.507163020569038e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.11030448168619639
          entropy_coeff: 0.009999999999999998
          kl: 0.05247967258108217
          policy_loss: -0.03892798688676622
          total_loss: 0.805724661383364
          vf_explained_var: -0.2976527810096741
          vf_loss: 0.8457556919143018
    num_agent_steps_sampled: 1320000
    num_agent_steps_trained: 1320000
    num_steps_sampled: 1320000
    num_steps_trained: 1320000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1320,36274.3,1320000,-71.577,-21.5,-100,715.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1321000
  custom_metrics: {}
  date: 2021-10-22_05-50-33
  done: false
  episode_len_mean: 714.59
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -71.45899999999943
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4397
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.760744530853557e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.06548579821246676
          entropy_coeff: 0.009999999999999998
          kl: 0.05544694584941296
          policy_loss: -0.04739362365669674
          total_loss: 0.626264376938343
          vf_explained_var: 0.045794159173965454
          vf_loss: 0.6743128538972492
    num_agent_steps_sampled: 1321000
    num_agent_steps_trained: 1321000
    num_steps_sampled: 1321000
    num_steps_trained: 132100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1321,36283.7,1321000,-71.459,-21.5,-100,714.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1322000
  custom_metrics: {}
  date: 2021-10-22_05-50-44
  done: false
  episode_len_mean: 717.16
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -71.71599999999943
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4399
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.641116796280336e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.7448838800191879
          entropy_coeff: 0.009999999999999998
          kl: 0.20032225572852794
          policy_loss: 0.09364283291829957
          total_loss: 0.6150075448883905
          vf_explained_var: -0.0058695473708212376
          vf_loss: 0.5288135411722276
    num_agent_steps_sampled: 1322000
    num_agent_steps_trained: 1322000
    num_steps_sampled: 1322000
    num_steps_trained: 13220

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1322,36294.3,1322000,-71.716,-21.5,-100,717.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1323000
  custom_metrics: {}
  date: 2021-10-22_05-50-56
  done: false
  episode_len_mean: 717.02
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -71.70199999999943
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4400
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.461675194420505e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.5711638030078676
          entropy_coeff: 0.009999999999999998
          kl: 0.028651933364304542
          policy_loss: -0.07935851381884682
          total_loss: 0.6924893447094493
          vf_explained_var: -0.43998727202415466
          vf_loss: 0.7775594961777743
    num_agent_steps_sampled: 1323000
    num_agent_steps_trained: 1323000
    num_steps_sampled: 1323000
    num_steps_trained: 13230

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1323,36306.9,1323000,-71.702,-21.5,-100,717.02


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1324000
  custom_metrics: {}
  date: 2021-10-22_05-51-11
  done: false
  episode_len_mean: 711.58
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -71.15799999999943
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4402
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2692512791630755e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.8051327384180493
          entropy_coeff: 0.009999999999999998
          kl: 0.03129984507288302
          policy_loss: 0.0968456111020512
          total_loss: 0.7864976114696927
          vf_explained_var: 0.05240384116768837
          vf_loss: 0.6977033231407404
    num_agent_steps_sampled: 1324000
    num_agent_steps_trained: 1324000
    num_steps_sampled: 1324000
    num_steps_trained: 1324000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1324,36321.5,1324000,-71.158,-21.5,-100,711.58


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1325000
  custom_metrics: {}
  date: 2021-10-22_05-51-26
  done: false
  episode_len_mean: 705.18
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -70.51799999999946
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4404
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9038769187446145e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.8403894007205963
          entropy_coeff: 0.009999999999999998
          kl: 0.13696745654850764
          policy_loss: 0.12850535445743136
          total_loss: 0.7622618195083406
          vf_explained_var: 0.33228662610054016
          vf_loss: 0.6421603604995956
    num_agent_steps_sampled: 1325000
    num_agent_steps_trained: 1325000
    num_steps_sampled: 1325000
    num_steps_trained: 1325000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1325,36337.1,1325000,-70.518,-21.5,-100,705.18


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1326000
  custom_metrics: {}
  date: 2021-10-22_05-51-40
  done: false
  episode_len_mean: 702.06
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -70.20599999999946
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4405
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.85581537811692e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.5629475060436461
          entropy_coeff: 0.009999999999999998
          kl: 0.026913046575907
          policy_loss: -0.06944333993726307
          total_loss: 0.5873254090547562
          vf_explained_var: 0.3351823091506958
          vf_loss: 0.6623982173804608
    num_agent_steps_sampled: 1326000
    num_agent_steps_trained: 1326000
    num_steps_sampled: 1326000
    num_steps_trained: 1326000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1326,36350.5,1326000,-70.206,-21.5,-100,702.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1327000
  custom_metrics: {}
  date: 2021-10-22_05-51-51
  done: false
  episode_len_mean: 696.08
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -69.6079999999995
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4407
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.28372306717538e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.5952525956763162
          entropy_coeff: 0.009999999999999998
          kl: 0.011089487655133705
          policy_loss: 0.03480955676900016
          total_loss: 0.657140921552976
          vf_explained_var: 0.4006389081478119
          vf_loss: 0.6282838951744553
    num_agent_steps_sampled: 1327000
    num_agent_steps_trained: 1327000
    num_steps_sampled: 1327000
    num_steps_trained: 1327000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1327,36362.1,1327000,-69.608,-21.5,-100,696.08


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1328000
  custom_metrics: {}
  date: 2021-10-22_05-52-02
  done: false
  episode_len_mean: 694.63
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -69.46299999999952
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4408
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.28372306717538e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.40205469032128655
          entropy_coeff: 0.009999999999999998
          kl: 0.004648242847512781
          policy_loss: -0.03726307517952389
          total_loss: 0.6088437447945277
          vf_explained_var: 0.155405655503273
          vf_loss: 0.6501273586104314
    num_agent_steps_sampled: 1328000
    num_agent_steps_trained: 1328000
    num_steps_sampled: 1328000
    num_steps_trained: 1328000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1328,36373.1,1328000,-69.463,-21.5,-100,694.63


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1329000
  custom_metrics: {}
  date: 2021-10-22_05-52-13
  done: false
  episode_len_mean: 693.97
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -69.39699999999952
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4409
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.14186153358769e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.3302304599020216
          entropy_coeff: 0.009999999999999998
          kl: 0.006985546946735295
          policy_loss: -0.04545777870549096
          total_loss: 0.6199844194783105
          vf_explained_var: -0.00020657049026340246
          vf_loss: 0.6687444778521442
    num_agent_steps_sampled: 1329000
    num_agent_steps_trained: 1329000
    num_steps_sampled: 1329000
    num_steps_trained: 132

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1329,36383.2,1329000,-69.397,-21.5,-100,693.97




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1330000
  custom_metrics: {}
  date: 2021-10-22_05-52-41
  done: false
  episode_len_mean: 692.2
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -69.21999999999953
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4410
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.14186153358769e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.1763850634296735
          entropy_coeff: 0.009999999999999998
          kl: 0.02105089791974232
          policy_loss: -0.06289524038632711
          total_loss: 0.6560074016451836
          vf_explained_var: 0.30111458897590637
          vf_loss: 0.7206664975059943
    num_agent_steps_sampled: 1330000
    num_agent_steps_trained: 1330000
    num_steps_sampled: 1330000
    num_steps_trained: 1330000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1330,36411.5,1330000,-69.22,-21.5,-100,692.2


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1331000
  custom_metrics: {}
  date: 2021-10-22_05-52-50
  done: false
  episode_len_mean: 692.5
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -69.24999999999953
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4412
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2127923003815357e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.8271733757522371
          entropy_coeff: 0.009999999999999998
          kl: 0.34134463826183986
          policy_loss: 0.11458496525883674
          total_loss: 0.5989668134186003
          vf_explained_var: -0.17785093188285828
          vf_loss: 0.4926535773039278
    num_agent_steps_sampled: 1331000
    num_agent_steps_trained: 1331000
    num_steps_sampled: 1331000
    num_steps_trained: 1331000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1331,36420.7,1331000,-69.25,-21.5,-100,692.5


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1332000
  custom_metrics: {}
  date: 2021-10-22_05-53-00
  done: false
  episode_len_mean: 692.34
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -69.23399999999954
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4413
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.819188450572303e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.38948966471685303
          entropy_coeff: 0.009999999999999998
          kl: 0.036053065973178554
          policy_loss: 0.22803940425316493
          total_loss: 0.23720497141281763
          vf_explained_var: 0.13229848444461823
          vf_loss: 0.013060463651911252
    num_agent_steps_sampled: 1332000
    num_agent_steps_trained: 1332000
    num_steps_sampled: 1332000
    num_steps_trained: 133

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1332,36430.9,1332000,-69.234,-21.5,-100,692.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1333000
  custom_metrics: {}
  date: 2021-10-22_05-53-10
  done: false
  episode_len_mean: 691.57
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -69.15699999999954
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4414
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.228782675858457e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.5310781780216429
          entropy_coeff: 0.009999999999999998
          kl: 0.013359673350669413
          policy_loss: -0.04665409409337574
          total_loss: 0.7189867633912298
          vf_explained_var: 0.03930442035198212
          vf_loss: 0.7709516636690953
    num_agent_steps_sampled: 1333000
    num_agent_steps_trained: 1333000
    num_steps_sampled: 1333000
    num_steps_trained: 133300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1333,36441,1333000,-69.157,-21.5,-100,691.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1334000
  custom_metrics: {}
  date: 2021-10-22_05-53-21
  done: false
  episode_len_mean: 691.33
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -69.13299999999954
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4415
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.228782675858457e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.49348542822731867
          entropy_coeff: 0.009999999999999998
          kl: 0.006101517461178061
          policy_loss: -0.05024259885152181
          total_loss: 0.7266475328140789
          vf_explained_var: 0.07007385045289993
          vf_loss: 0.7818249761644337
    num_agent_steps_sampled: 1334000
    num_agent_steps_trained: 1334000
    num_steps_sampled: 1334000
    num_steps_trained: 13340

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1334,36451.1,1334000,-69.133,-21.5,-100,691.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1335000
  custom_metrics: {}
  date: 2021-10-22_05-53-30
  done: false
  episode_len_mean: 690.89
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -69.08899999999954
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4416
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.228782675858457e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.4647378149959776
          entropy_coeff: 0.009999999999999998
          kl: 0.00395329853220182
          policy_loss: -0.04740597622262107
          total_loss: 0.740595339735349
          vf_explained_var: 0.05267854779958725
          vf_loss: 0.7926486774865124
    num_agent_steps_sampled: 1335000
    num_agent_steps_trained: 1335000
    num_steps_sampled: 1335000
    num_steps_trained: 1335000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1335,36460.3,1335000,-69.089,-21.5,-100,690.89


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1336000
  custom_metrics: {}
  date: 2021-10-22_05-53-40
  done: false
  episode_len_mean: 690.65
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -69.06499999999954
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4417
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6143913379292283e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.38395690520604453
          entropy_coeff: 0.009999999999999998
          kl: 0.01007176992210227
          policy_loss: -0.0412942874762747
          total_loss: 0.722108413444625
          vf_explained_var: 0.028758713975548744
          vf_loss: 0.7672422715048823
    num_agent_steps_sampled: 1336000
    num_agent_steps_trained: 1336000
    num_steps_sampled: 1336000
    num_steps_trained: 133600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1336,36470.3,1336000,-69.065,-21.5,-100,690.65


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1337000
  custom_metrics: {}
  date: 2021-10-22_05-53-49
  done: false
  episode_len_mean: 690.17
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -69.01699999999954
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4418
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6143913379292283e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.44203343821896446
          entropy_coeff: 0.009999999999999998
          kl: 0.005458261406025776
          policy_loss: -0.05437956303358078
          total_loss: 0.7588682709468736
          vf_explained_var: 0.12158714234828949
          vf_loss: 0.8176681785326865
    num_agent_steps_sampled: 1337000
    num_agent_steps_trained: 1337000
    num_steps_sampled: 1337000
    num_steps_trained: 1337

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1337,36479.4,1337000,-69.017,-21.5,-100,690.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1338000
  custom_metrics: {}
  date: 2021-10-22_05-53-59
  done: false
  episode_len_mean: 690.17
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -69.01699999999956
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4419
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6143913379292283e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.5088362491793103
          entropy_coeff: 0.009999999999999998
          kl: 0.011329271582498752
          policy_loss: -0.057268034170071286
          total_loss: 0.7733636644151476
          vf_explained_var: -0.015504884533584118
          vf_loss: 0.8357200456265774
    num_agent_steps_sampled: 1338000
    num_agent_steps_trained: 1338000
    num_steps_sampled: 1338000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1338,36489.5,1338000,-69.017,-21.5,-100,690.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1339000
  custom_metrics: {}
  date: 2021-10-22_05-54-08
  done: false
  episode_len_mean: 691.31
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -69.13099999999955
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4420
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6143913379292283e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.5093076220817037
          entropy_coeff: 0.009999999999999998
          kl: 0.00561786014624488
          policy_loss: -0.0516910860935847
          total_loss: 0.7775223685635461
          vf_explained_var: -0.29106539487838745
          vf_loss: 0.8343065516091883
    num_agent_steps_sampled: 1339000
    num_agent_steps_trained: 1339000
    num_steps_sampled: 1339000
    num_steps_trained: 133900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1339,36498.8,1339000,-69.131,-21.5,-100,691.31


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1340000
  custom_metrics: {}
  date: 2021-10-22_05-54-18
  done: false
  episode_len_mean: 691.19
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -69.11899999999955
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4421
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6143913379292283e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.5581911285718282
          entropy_coeff: 0.009999999999999998
          kl: 0.006910903514258216
          policy_loss: -0.049872401687833995
          total_loss: 0.7709828073779742
          vf_explained_var: 0.1187458485364914
          vf_loss: 0.8264371106840878
    num_agent_steps_sampled: 1340000
    num_agent_steps_trained: 1340000
    num_steps_sampled: 1340000
    num_steps_trained: 13400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1340,36508.8,1340000,-69.119,-21.5,-100,691.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1341000
  custom_metrics: {}
  date: 2021-10-22_05-54-28
  done: false
  episode_len_mean: 691.03
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -69.10299999999955
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4422
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6143913379292283e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.5658572286367416
          entropy_coeff: 0.009999999999999998
          kl: 0.015719586276665608
          policy_loss: -0.06051229006714291
          total_loss: 0.7569695832000838
          vf_explained_var: 0.14945152401924133
          vf_loss: 0.8231404338704629
    num_agent_steps_sampled: 1341000
    num_agent_steps_trained: 1341000
    num_steps_sampled: 1341000
    num_steps_trained: 13410

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1341,36518.6,1341000,-69.103,-21.5,-100,691.03


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1342000
  custom_metrics: {}
  date: 2021-10-22_05-54-38
  done: false
  episode_len_mean: 690.19
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -69.01899999999955
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4423
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6143913379292283e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.2761035862068335
          entropy_coeff: 0.009999999999999998
          kl: 0.03866138389632472
          policy_loss: -0.06625107212199105
          total_loss: 0.7538472377591663
          vf_explained_var: 0.11701095104217529
          vf_loss: 0.8228593404249599
    num_agent_steps_sampled: 1342000
    num_agent_steps_trained: 1342000
    num_steps_sampled: 1342000
    num_steps_trained: 134200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1342,36528.8,1342000,-69.019,-21.5,-100,690.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1343000
  custom_metrics: {}
  date: 2021-10-22_05-54-50
  done: false
  episode_len_mean: 689.87
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.98699999999955
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4424
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.421587006893841e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.6485533268915282
          entropy_coeff: 0.009999999999999998
          kl: 0.07074594389360256
          policy_loss: -0.05427774041891098
          total_loss: 0.7061041108436055
          vf_explained_var: 0.13533663749694824
          vf_loss: 0.7668673913486095
    num_agent_steps_sampled: 1343000
    num_agent_steps_trained: 1343000
    num_steps_sampled: 1343000
    num_steps_trained: 1343000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1343,36540,1343000,-68.987,-21.5,-100,689.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1344000
  custom_metrics: {}
  date: 2021-10-22_05-54-59
  done: false
  episode_len_mean: 689.19
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.91899999999956
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4425
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.132380510340763e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.061266569648351935
          entropy_coeff: 0.009999999999999998
          kl: 0.29262066188329705
          policy_loss: -0.03298163712024689
          total_loss: 0.7863186000121964
          vf_explained_var: 0.10733029246330261
          vf_loss: 0.819912900051309
    num_agent_steps_sampled: 1344000
    num_agent_steps_trained: 1344000
    num_steps_sampled: 1344000
    num_steps_trained: 134400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1344,36549.3,1344000,-68.919,-21.5,-100,689.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1345000
  custom_metrics: {}
  date: 2021-10-22_05-55-08
  done: false
  episode_len_mean: 689.19
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.91899999999956
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4426
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2198570765511142e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.6679955680751138
          entropy_coeff: 0.009999999999999998
          kl: 0.24341251597210578
          policy_loss: -0.0587876859638426
          total_loss: 0.5802811783221032
          vf_explained_var: 0.08552119880914688
          vf_loss: 0.6457488123081728
    num_agent_steps_sampled: 1345000
    num_agent_steps_trained: 1345000
    num_steps_sampled: 1345000
    num_steps_trained: 1345000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1345,36558.6,1345000,-68.919,-21.5,-100,689.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1346000
  custom_metrics: {}
  date: 2021-10-22_05-55-18
  done: false
  episode_len_mean: 689.18
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.91799999999957
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4427
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.829785614826672e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.26367782089445324
          entropy_coeff: 0.009999999999999998
          kl: 0.010110775201473467
          policy_loss: -0.06514497134420606
          total_loss: 0.7677280836635165
          vf_explained_var: -0.05454586446285248
          vf_loss: 0.835509832094734
    num_agent_steps_sampled: 1346000
    num_agent_steps_trained: 1346000
    num_steps_sampled: 1346000
    num_steps_trained: 13460

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1346,36567.9,1346000,-68.918,-21.5,-100,689.18


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1347000
  custom_metrics: {}
  date: 2021-10-22_05-55-27
  done: false
  episode_len_mean: 689.14
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.91399999999956
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4428
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.829785614826672e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.3523465714520878
          entropy_coeff: 0.009999999999999998
          kl: 0.04816263140466819
          policy_loss: -0.06018380986319648
          total_loss: 0.6931529561678569
          vf_explained_var: -0.03288120776414871
          vf_loss: 0.756860235079916
    num_agent_steps_sampled: 1347000
    num_agent_steps_trained: 1347000
    num_steps_sampled: 1347000
    num_steps_trained: 1347000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1347,36577.2,1347000,-68.914,-21.5,-100,689.14


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1348000
  custom_metrics: {}
  date: 2021-10-22_05-55-36
  done: false
  episode_len_mean: 689.1
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.90999999999957
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4429
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7446784222400077e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.14449187268813452
          entropy_coeff: 0.009999999999999998
          kl: 0.00789982481991933
          policy_loss: -0.06709945268101163
          total_loss: 0.5066728777355618
          vf_explained_var: 0.06284502148628235
          vf_loss: 0.5752172536785818
    num_agent_steps_sampled: 1348000
    num_agent_steps_trained: 1348000
    num_steps_sampled: 1348000
    num_steps_trained: 134800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1348,36586.7,1348000,-68.91,-21.5,-100,689.1


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1349000
  custom_metrics: {}
  date: 2021-10-22_05-55-46
  done: false
  episode_len_mean: 688.85
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.88499999999956
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4430
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7446784222400077e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.03973385335670577
          entropy_coeff: 0.009999999999999998
          kl: 0.15947882913788491
          policy_loss: -0.05736562112967173
          total_loss: 0.43472252322567834
          vf_explained_var: 0.2967072129249573
          vf_loss: 0.49248549207631087
    num_agent_steps_sampled: 1349000
    num_agent_steps_trained: 1349000
    num_steps_sampled: 1349000
    num_steps_trained: 1349

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1349,36596.4,1349000,-68.885,-21.5,-100,688.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1350000
  custom_metrics: {}
  date: 2021-10-22_05-55-55
  done: false
  episode_len_mean: 688.17
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.81699999999957
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4431
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.117017633360013e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.020785464249396077
          entropy_coeff: 0.009999999999999998
          kl: 0.002749093386007132
          policy_loss: -0.03357299119234085
          total_loss: 0.46519238667355645
          vf_explained_var: 0.0072354706935584545
          vf_loss: 0.4989732243224151
    num_agent_steps_sampled: 1350000
    num_agent_steps_trained: 1350000
    num_steps_sampled: 1350000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1350,36605.7,1350000,-68.817,-21.5,-100,688.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1351000
  custom_metrics: {}
  date: 2021-10-22_05-56-05
  done: false
  episode_len_mean: 688.21
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.82099999999956
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4432
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0585088166800066e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.020822218603764972
          entropy_coeff: 0.009999999999999998
          kl: 0.011499385534540243
          policy_loss: -0.02242021949754821
          total_loss: 0.2674822770059109
          vf_explained_var: 0.19237083196640015
          vf_loss: 0.29011071620043366
    num_agent_steps_sampled: 1351000
    num_agent_steps_trained: 1351000
    num_steps_sampled: 1351000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1351,36614.8,1351000,-68.821,-21.5,-100,688.21


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1352000
  custom_metrics: {}
  date: 2021-10-22_05-56-14
  done: false
  episode_len_mean: 688.17
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.81699999999957
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4433
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0585088166800066e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.011117314163129777
          entropy_coeff: 0.009999999999999998
          kl: 0.0015938099869130184
          policy_loss: -0.008864244694511096
          total_loss: 0.22735773680938615
          vf_explained_var: 0.10963596403598785
          vf_loss: 0.236333150363579
    num_agent_steps_sampled: 1352000
    num_agent_steps_trained: 1352000
    num_steps_sampled: 1352000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1352,36624.3,1352000,-68.817,-21.5,-100,688.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1353000
  custom_metrics: {}
  date: 2021-10-22_05-56-23
  done: false
  episode_len_mean: 688.17
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.81699999999957
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4434
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0292544083400033e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.003956542590943476
          entropy_coeff: 0.009999999999999998
          kl: 0.00011563396017657052
          policy_loss: -0.012464761485656102
          total_loss: 0.35192832007176345
          vf_explained_var: -0.003381183370947838
          vf_loss: 0.3644326616240303
    num_agent_steps_sampled: 1353000
    num_agent_steps_trained: 1353000
    num_steps_sampled: 1353000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1353,36633.1,1353000,-68.817,-21.5,-100,688.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1354000
  custom_metrics: {}
  date: 2021-10-22_05-56-32
  done: false
  episode_len_mean: 688.17
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.81699999999955
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4435
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.146272041700017e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.04086825663689524
          entropy_coeff: 0.009999999999999998
          kl: 0.007600910999835748
          policy_loss: -0.026603321565522087
          total_loss: 0.2332065870364507
          vf_explained_var: -0.0022205908317118883
          vf_loss: 0.2602185915120774
    num_agent_steps_sampled: 1354000
    num_agent_steps_trained: 1354000
    num_steps_sampled: 1354000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1354,36642.5,1354000,-68.817,-21.5,-100,688.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1355000
  custom_metrics: {}
  date: 2021-10-22_05-56-42
  done: false
  episode_len_mean: 686.98
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.69799999999957
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4437
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.146272041700017e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.42621349170804024
          entropy_coeff: 0.009999999999999998
          kl: 0.03737931751363577
          policy_loss: 0.1679687035580476
          total_loss: 0.3939194513691796
          vf_explained_var: -0.027063971385359764
          vf_loss: 0.2302128806192842
    num_agent_steps_sampled: 1355000
    num_agent_steps_trained: 1355000
    num_steps_sampled: 1355000
    num_steps_trained: 135500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1355,36652.3,1355000,-68.698,-21.5,-100,686.98


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1356000
  custom_metrics: {}
  date: 2021-10-22_05-56-53
  done: false
  episode_len_mean: 685.67
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.56699999999958
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4438
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.719408062550021e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.09742940465609233
          entropy_coeff: 0.009999999999999998
          kl: 0.3034085760823813
          policy_loss: -0.06478126860327191
          total_loss: 0.32388134913312067
          vf_explained_var: 0.38698768615722656
          vf_loss: 0.3896369159946011
    num_agent_steps_sampled: 1356000
    num_agent_steps_trained: 1356000
    num_steps_sampled: 1356000
    num_steps_trained: 135600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1356,36663.4,1356000,-68.567,-21.5,-100,685.67


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1357000
  custom_metrics: {}
  date: 2021-10-22_05-57-02
  done: false
  episode_len_mean: 685.32
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.53199999999958
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4439
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1579112093825033e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.05622299193508095
          entropy_coeff: 0.009999999999999998
          kl: 0.0018530780173744432
          policy_loss: -0.0953400981095102
          total_loss: 0.3708267766568396
          vf_explained_var: 0.00860251858830452
          vf_loss: 0.46672909984158145
    num_agent_steps_sampled: 1357000
    num_agent_steps_trained: 1357000
    num_steps_sampled: 1357000
    num_steps_trained: 135

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1357,36672.3,1357000,-68.532,-21.5,-100,685.32




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1358000
  custom_metrics: {}
  date: 2021-10-22_05-57-29
  done: false
  episode_len_mean: 683.87
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.3869999999996
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4440
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.789556046912517e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.004824013154332837
          entropy_coeff: 0.009999999999999998
          kl: 0.0011821007981470011
          policy_loss: 0.008805075287818908
          total_loss: 0.45701288746462926
          vf_explained_var: 0.13532231748104095
          vf_loss: 0.4482560498432981
    num_agent_steps_sampled: 1358000
    num_agent_steps_trained: 1358000
    num_steps_sampled: 1358000
    num_steps_trained: 135

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1358,36699.4,1358000,-68.387,-21.5,-100,683.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1359000
  custom_metrics: {}
  date: 2021-10-22_05-57-38
  done: false
  episode_len_mean: 683.95
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.39499999999958
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4441
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8947780234562584e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.021846274639635035
          entropy_coeff: 0.009999999999999998
          kl: 0.0007176611408519908
          policy_loss: -0.07027002887593375
          total_loss: 0.5615553468465805
          vf_explained_var: -0.08092479407787323
          vf_loss: 0.6320438446218355
    num_agent_steps_sampled: 1359000
    num_agent_steps_trained: 1359000
    num_steps_sampled: 1359000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1359,36708.5,1359000,-68.395,-21.5,-100,683.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1360000
  custom_metrics: {}
  date: 2021-10-22_05-57-48
  done: false
  episode_len_mean: 683.95
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.39499999999958
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4442
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4473890117281292e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.004277377640311089
          entropy_coeff: 0.009999999999999998
          kl: 0.0001850833617346491
          policy_loss: -0.029827357166343264
          total_loss: 0.5869691198070844
          vf_explained_var: 0.06010203808546066
          vf_loss: 0.6168392453446156
    num_agent_steps_sampled: 1360000
    num_agent_steps_trained: 1360000
    num_steps_sampled: 1360000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1360,36717.8,1360000,-68.395,-21.5,-100,683.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1361000
  custom_metrics: {}
  date: 2021-10-22_05-57-57
  done: false
  episode_len_mean: 683.99
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.39899999999959
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4443
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.236945058640646e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.014863142585899268
          entropy_coeff: 0.009999999999999998
          kl: 0.0009190422953101536
          policy_loss: -0.070631545699305
          total_loss: 0.6496153548359871
          vf_explained_var: -0.05662037432193756
          vf_loss: 0.7203955478345354
    num_agent_steps_sampled: 1361000
    num_agent_steps_trained: 1361000
    num_steps_sampled: 1361000
    num_steps_trained: 1361

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1361,36726.9,1361000,-68.399,-21.5,-100,683.99


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1362000
  custom_metrics: {}
  date: 2021-10-22_05-58-06
  done: false
  episode_len_mean: 683.99
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.39899999999957
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4444
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.618472529320323e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.034549873187077335
          entropy_coeff: 0.009999999999999998
          kl: 0.01858020956518265
          policy_loss: -0.036943520274427204
          total_loss: 0.6510815373725362
          vf_explained_var: 0.045523881912231445
          vf_loss: 0.6883705609478057
    num_agent_steps_sampled: 1362000
    num_agent_steps_trained: 1362000
    num_steps_sampled: 1362000
    num_steps_trained: 136

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1362,36736.2,1362000,-68.399,-21.5,-100,683.99


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1363000
  custom_metrics: {}
  date: 2021-10-22_05-58-16
  done: false
  episode_len_mean: 684.11
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.41099999999956
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4445
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.618472529320323e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.0032579321808750843
          entropy_coeff: 0.009999999999999998
          kl: 4.574184352744889e-05
          policy_loss: -0.04725147270494037
          total_loss: 0.7272348331080543
          vf_explained_var: 0.01258266530930996
          vf_loss: 0.7745188839216199
    num_agent_steps_sampled: 1363000
    num_agent_steps_trained: 1363000
    num_steps_sampled: 1363000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1363,36745.6,1363000,-68.411,-21.5,-100,684.11


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1364000
  custom_metrics: {}
  date: 2021-10-22_05-58-25
  done: false
  episode_len_mean: 684.62
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.46199999999958
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4446
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8092362646601615e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.003104773145686421
          entropy_coeff: 0.009999999999999998
          kl: 0.00027862220067406375
          policy_loss: -0.05124746445152495
          total_loss: 0.7525486181179682
          vf_explained_var: -0.002790347672998905
          vf_loss: 0.8038271336816252
    num_agent_steps_sampled: 1364000
    num_agent_steps_trained: 1364000
    num_steps_sampled: 1364000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1364,36754.5,1364000,-68.462,-21.5,-100,684.62


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1365000
  custom_metrics: {}
  date: 2021-10-22_05-58-34
  done: false
  episode_len_mean: 685.49
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.54899999999957
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4447
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.046181323300807e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.01083348201096265
          entropy_coeff: 0.009999999999999998
          kl: 0.0003115059391332855
          policy_loss: -0.06520309746265411
          total_loss: 0.7419837729798423
          vf_explained_var: -0.024232562631368637
          vf_loss: 0.8072951956548624
    num_agent_steps_sampled: 1365000
    num_agent_steps_trained: 1365000
    num_steps_sampled: 1365000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1365,36764,1365000,-68.549,-21.5,-100,685.49


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1366000
  custom_metrics: {}
  date: 2021-10-22_05-58-43
  done: false
  episode_len_mean: 685.93
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.59299999999956
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4448
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.523090661650404e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.00817166235517814
          entropy_coeff: 0.009999999999999998
          kl: 0.0015110315512479116
          policy_loss: -0.04653474870655272
          total_loss: 0.7383091325561205
          vf_explained_var: 0.02581297792494297
          vf_loss: 0.784925583915578
    num_agent_steps_sampled: 1366000
    num_agent_steps_trained: 1366000
    num_steps_sampled: 1366000
    num_steps_trained: 13660

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1366,36773.1,1366000,-68.593,-21.5,-100,685.93


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1367000
  custom_metrics: {}
  date: 2021-10-22_05-58-53
  done: false
  episode_len_mean: 688.29
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -68.82899999999955
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4449
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.261545330825202e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.0036438257821525136
          entropy_coeff: 0.009999999999999998
          kl: 0.00010137418382686707
          policy_loss: -0.049823427697022755
          total_loss: 0.7920639832814534
          vf_explained_var: 0.01849929243326187
          vf_loss: 0.8419238346732325
    num_agent_steps_sampled: 1367000
    num_agent_steps_trained: 1367000
    num_steps_sampled: 1367000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1367,36782.8,1367000,-68.829,-21.5,-100,688.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1368000
  custom_metrics: {}
  date: 2021-10-22_05-59-02
  done: false
  episode_len_mean: 694.54
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -69.45399999999952
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4450
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.130772665412601e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.0037956123201486965
          entropy_coeff: 0.009999999999999998
          kl: 0.0005953265840704855
          policy_loss: -0.05303869048754374
          total_loss: 0.8181760763128598
          vf_explained_var: 0.00039596029091626406
          vf_loss: 0.871252717781398
    num_agent_steps_sampled: 1368000
    num_agent_steps_trained: 1368000
    num_steps_sampled: 1368000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1368,36791.7,1368000,-69.454,-21.5,-100,694.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1369000
  custom_metrics: {}
  date: 2021-10-22_05-59-11
  done: false
  episode_len_mean: 699.21
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -69.92099999999951
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4451
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.653863327063005e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.008262419396649219
          entropy_coeff: 0.009999999999999998
          kl: 0.000419501084852985
          policy_loss: -0.05908544974194633
          total_loss: 0.8135133177042008
          vf_explained_var: -0.0068173096515238285
          vf_loss: 0.8726813942193985
    num_agent_steps_sampled: 1369000
    num_agent_steps_trained: 1369000
    num_steps_sampled: 1369000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1369,36801.3,1369000,-69.921,-21.5,-100,699.21


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1370000
  custom_metrics: {}
  date: 2021-10-22_05-59-20
  done: false
  episode_len_mean: 703.73
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -70.3729999999995
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4452
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8269316635315023e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.023893517498961753
          entropy_coeff: 0.009999999999999998
          kl: 0.0007168701166378996
          policy_loss: -0.06934930301374859
          total_loss: 0.7968441479735904
          vf_explained_var: -0.022171255201101303
          vf_loss: 0.8664323697487514
    num_agent_steps_sampled: 1370000
    num_agent_steps_trained: 1370000
    num_steps_sampled: 1370000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1370,36810,1370000,-70.373,-21.5,-100,703.73


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1371000
  custom_metrics: {}
  date: 2021-10-22_05-59-30
  done: false
  episode_len_mean: 709.09
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -70.90899999999947
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4453
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4134658317657512e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.00514207125783691
          entropy_coeff: 0.009999999999999998
          kl: 0.001016577109080375
          policy_loss: -0.04682100017865499
          total_loss: 0.7752880599763659
          vf_explained_var: 0.04302923381328583
          vf_loss: 0.8221604828205373
    num_agent_steps_sampled: 1371000
    num_agent_steps_trained: 1371000
    num_steps_sampled: 1371000
    num_steps_trained: 1371

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1371,36819.7,1371000,-70.909,-21.5,-100,709.09


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1372000
  custom_metrics: {}
  date: 2021-10-22_05-59-39
  done: false
  episode_len_mean: 714.53
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -71.45299999999946
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4454
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.067329158828756e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.007628893678904408
          entropy_coeff: 0.009999999999999998
          kl: 0.00026789810617026987
          policy_loss: -0.05296605858537886
          total_loss: 0.8338307677043809
          vf_explained_var: 0.0008794539608061314
          vf_loss: 0.8868731226151189
    num_agent_steps_sampled: 1372000
    num_agent_steps_trained: 1372000
    num_steps_sampled: 1372000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1372,36828.5,1372000,-71.453,-21.5,-100,714.53


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1373000
  custom_metrics: {}
  date: 2021-10-22_05-59-48
  done: false
  episode_len_mean: 718.17
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -71.81699999999944
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4455
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.533664579414378e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.013000221701804548
          entropy_coeff: 0.009999999999999998
          kl: 0.00017837417450715468
          policy_loss: -0.055332382851176795
          total_loss: 0.8335507326655918
          vf_explained_var: 0.005550771486014128
          vf_loss: 0.889013079346882
    num_agent_steps_sampled: 1373000
    num_agent_steps_trained: 1373000
    num_steps_sampled: 1373000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1373,36837.9,1373000,-71.817,-21.5,-100,718.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1374000
  custom_metrics: {}
  date: 2021-10-22_05-59-57
  done: false
  episode_len_mean: 724.69
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -72.46899999999943
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4456
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.766832289707189e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.011557322664238099
          entropy_coeff: 0.009999999999999998
          kl: 0.001578642479815581
          policy_loss: -0.05401126394669215
          total_loss: 0.8389413005775875
          vf_explained_var: 0.005323245655745268
          vf_loss: 0.8930681398345365
    num_agent_steps_sampled: 1374000
    num_agent_steps_trained: 1374000
    num_steps_sampled: 1374000
    num_steps_trained: 137

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1374,36847,1374000,-72.469,-21.5,-100,724.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1375000
  custom_metrics: {}
  date: 2021-10-22_06-00-06
  done: false
  episode_len_mean: 726.69
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -72.66899999999941
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4457
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.834161448535945e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.004453646544263595
          entropy_coeff: 0.009999999999999998
          kl: 0.0005699309991376947
          policy_loss: -0.0533438449104627
          total_loss: 0.8430260048972236
          vf_explained_var: 0.012910525314509869
          vf_loss: 0.896414395628704
    num_agent_steps_sampled: 1375000
    num_agent_steps_trained: 1375000
    num_steps_sampled: 1375000
    num_steps_trained: 1375

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1375,36856.1,1375000,-72.669,-21.5,-100,726.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1376000
  custom_metrics: {}
  date: 2021-10-22_06-00-16
  done: false
  episode_len_mean: 728.45
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -72.84499999999942
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4458
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.4170807242679724e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.007674536977234917
          entropy_coeff: 0.009999999999999998
          kl: 0.00017756538464014742
          policy_loss: -0.05607302453782823
          total_loss: 0.8600453644990921
          vf_explained_var: -0.00010789301450131461
          vf_loss: 0.9161951293754909
    num_agent_steps_sampled: 1376000
    num_agent_steps_trained: 1376000
    num_steps_sampled: 1376000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1376,36865.4,1376000,-72.845,-21.5,-100,728.45


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1377000
  custom_metrics: {}
  date: 2021-10-22_06-00-25
  done: false
  episode_len_mean: 729.53
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -72.9529999999994
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4459
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2085403621339862e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.002874192448022465
          entropy_coeff: 0.009999999999999998
          kl: 0.0005078886076092485
          policy_loss: -0.05439280586110221
          total_loss: 0.8437629613611434
          vf_explained_var: 0.0027520207222551107
          vf_loss: 0.8981845053740674
    num_agent_steps_sampled: 1377000
    num_agent_steps_trained: 1377000
    num_steps_sampled: 1377000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1377,36874.3,1377000,-72.953,-21.5,-100,729.53


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1378000
  custom_metrics: {}
  date: 2021-10-22_06-00-34
  done: false
  episode_len_mean: 731.32
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -73.1319999999994
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4460
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1042701810669931e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.002202885184669867
          entropy_coeff: 0.009999999999999998
          kl: 3.348831824388807e-05
          policy_loss: -0.05354680021603902
          total_loss: 0.8355906065967348
          vf_explained_var: 0.0016094744205474854
          vf_loss: 0.8891594322812226
    num_agent_steps_sampled: 1378000
    num_agent_steps_trained: 1378000
    num_steps_sampled: 1378000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1378,36883.8,1378000,-73.132,-21.5,-100,731.32


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1379000
  custom_metrics: {}
  date: 2021-10-22_06-00-43
  done: false
  episode_len_mean: 732.76
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -73.27599999999937
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4461
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.5213509053349655e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.0034400445305638842
          entropy_coeff: 0.009999999999999998
          kl: 6.0177941878371124e-05
          policy_loss: -0.05331254435910119
          total_loss: 0.8271693251199193
          vf_explained_var: 0.0016178899677470326
          vf_loss: 0.8805162571991484
    num_agent_steps_sampled: 1379000
    num_agent_steps_trained: 1379000
    num_steps_sampled: 1379000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1379,36892.7,1379000,-73.276,-21.5,-100,732.76


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1380000
  custom_metrics: {}
  date: 2021-10-22_06-00-53
  done: false
  episode_len_mean: 735.16
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -73.51599999999937
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4462
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7606754526674827e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.0049388398468080495
          entropy_coeff: 0.009999999999999998
          kl: 0.00012450105216548458
          policy_loss: -0.05322796851396561
          total_loss: 0.8196348874105348
          vf_explained_var: 0.0017450670711696148
          vf_loss: 0.8729122518665261
    num_agent_steps_sampled: 1380000
    num_agent_steps_trained: 1380000
    num_steps_sampled: 1380000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1380,36902.3,1380000,-73.516,-21.5,-100,735.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1381000
  custom_metrics: {}
  date: 2021-10-22_06-01-02
  done: false
  episode_len_mean: 735.94
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -73.59399999999935
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4463
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3803377263337414e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.012663860930155755
          entropy_coeff: 0.009999999999999998
          kl: 0.0013289500919691636
          policy_loss: -0.06249621253874567
          total_loss: 0.7955595178736581
          vf_explained_var: -0.0038972655311226845
          vf_loss: 0.858182377823525
    num_agent_steps_sampled: 1381000
    num_agent_steps_trained: 1381000
    num_steps_sampled: 1381000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1381,36911.3,1381000,-73.594,-21.5,-100,735.94


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1382000
  custom_metrics: {}
  date: 2021-10-22_06-01-11
  done: false
  episode_len_mean: 737.29
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -73.72899999999935
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4464
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.901688631668707e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.009499452144761259
          entropy_coeff: 0.009999999999999998
          kl: 0.0003415510397126066
          policy_loss: -0.05192302167415619
          total_loss: 0.792176026933723
          vf_explained_var: 0.012597729451954365
          vf_loss: 0.8441940571698877
    num_agent_steps_sampled: 1382000
    num_agent_steps_trained: 1382000
    num_steps_sampled: 1382000
    num_steps_trained: 138

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1382,36920.9,1382000,-73.729,-21.5,-100,737.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1383000
  custom_metrics: {}
  date: 2021-10-22_06-01-20
  done: false
  episode_len_mean: 739.93
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -73.99299999999934
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4465
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4508443158343534e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.012991445993409595
          entropy_coeff: 0.009999999999999998
          kl: 0.0002148752966585516
          policy_loss: -0.055775334437688194
          total_loss: 0.8041801551977793
          vf_explained_var: -0.001031465013511479
          vf_loss: 0.8600854102315174
    num_agent_steps_sampled: 1383000
    num_agent_steps_trained: 1383000
    num_steps_sampled: 1383000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1383,36929.7,1383000,-73.993,-21.5,-100,739.93


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1384000
  custom_metrics: {}
  date: 2021-10-22_06-01-30
  done: false
  episode_len_mean: 741.77
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -74.17699999999932
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4466
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7254221579171767e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.018275131475335608
          entropy_coeff: 0.009999999999999998
          kl: 0.0002670122599093272
          policy_loss: -0.05911214583449893
          total_loss: 0.7884262949228287
          vf_explained_var: -0.0006886959308758378
          vf_loss: 0.8477212004777458
    num_agent_steps_sampled: 1384000
    num_agent_steps_trained: 1384000
    num_steps_sampled: 1384000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1384,36939.4,1384000,-74.177,-21.5,-100,741.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1385000
  custom_metrics: {}
  date: 2021-10-22_06-01-39
  done: false
  episode_len_mean: 743.57
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -74.35699999999932
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4467
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.627110789585884e-30
          cur_lr: 5.000000000000001e-05
          entropy: 0.013966526797351738
          entropy_coeff: 0.009999999999999998
          kl: 0.0003230360675194493
          policy_loss: -0.05336735745271047
          total_loss: 0.7931813119186295
          vf_explained_var: 0.008500714786350727
          vf_loss: 0.8466883360925648
    num_agent_steps_sampled: 1385000
    num_agent_steps_trained: 1385000
    num_steps_sampled: 1385000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1385,36948.3,1385000,-74.357,-21.5,-100,743.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1386000
  custom_metrics: {}
  date: 2021-10-22_06-01-48
  done: false
  episode_len_mean: 745.57
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -74.5569999999993
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4468
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.313555394792942e-30
          cur_lr: 5.000000000000001e-05
          entropy: 0.008239218353992328
          entropy_coeff: 0.009999999999999998
          kl: 0.001405606487604005
          policy_loss: -0.05260914978053835
          total_loss: 0.7907269221213129
          vf_explained_var: 0.024133317172527313
          vf_loss: 0.8434184700043665
    num_agent_steps_sampled: 1386000
    num_agent_steps_trained: 1386000
    num_steps_sampled: 1386000
    num_steps_trained: 1386

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1386,36957.7,1386000,-74.557,-21.5,-100,745.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1387000
  custom_metrics: {}
  date: 2021-10-22_06-01-57
  done: false
  episode_len_mean: 747.05
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -74.7049999999993
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4469
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.156777697396471e-30
          cur_lr: 5.000000000000001e-05
          entropy: 0.003652524756681588
          entropy_coeff: 0.009999999999999998
          kl: 0.0002012977513954893
          policy_loss: -0.053371721009413405
          total_loss: 0.8243184505237473
          vf_explained_var: 0.0025774736423045397
          vf_loss: 0.8777266941550705
    num_agent_steps_sampled: 1387000
    num_agent_steps_trained: 1387000
    num_steps_sampled: 1387000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1387,36966.7,1387000,-74.705,-21.5,-100,747.05




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1388000
  custom_metrics: {}
  date: 2021-10-22_06-02-23
  done: false
  episode_len_mean: 751.57
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -75.1569999999993
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4470
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0783888486982354e-30
          cur_lr: 5.000000000000001e-05
          entropy: 0.0016630245592548615
          entropy_coeff: 0.009999999999999998
          kl: 0.00037908504925579327
          policy_loss: -0.01578004914853308
          total_loss: 0.33865931754310924
          vf_explained_var: 0.0006806757883168757
          vf_loss: 0.3544559892558027
    num_agent_steps_sampled: 1388000
    num_agent_steps_trained: 1388000
    num_steps_sampled: 1388000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1388,36992.7,1388000,-75.157,-21.5,-100,751.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1389000
  custom_metrics: {}
  date: 2021-10-22_06-02-34
  done: false
  episode_len_mean: 758.3
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -75.82999999999927
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4471
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.391944243491177e-31
          cur_lr: 5.000000000000001e-05
          entropy: 0.007458338557302745
          entropy_coeff: 0.009999999999999998
          kl: 0.0003643099373984416
          policy_loss: -0.017596938129928377
          total_loss: 0.3442719865176413
          vf_explained_var: 1.2452072951418813e-05
          vf_loss: 0.3619435066596553
    num_agent_steps_sampled: 1389000
    num_agent_steps_trained: 1389000
    num_steps_sampled: 1389000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1389,37003.6,1389000,-75.83,-21.5,-100,758.3


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1390000
  custom_metrics: {}
  date: 2021-10-22_06-02-43
  done: false
  episode_len_mean: 765.27
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -76.52699999999926
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4472
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6959721217455886e-31
          cur_lr: 5.000000000000001e-05
          entropy: 0.0020694480487792235
          entropy_coeff: 0.009999999999999998
          kl: 9.012621892217845e-05
          policy_loss: -0.016312546283006667
          total_loss: 0.35069957656992806
          vf_explained_var: 0.0012054145336151123
          vf_loss: 0.36703281756684497
    num_agent_steps_sampled: 1390000
    num_agent_steps_trained: 1390000
    num_steps_sampled: 1390000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1390,37012.7,1390000,-76.527,-21.5,-100,765.27


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1391000
  custom_metrics: {}
  date: 2021-10-22_06-02-53
  done: false
  episode_len_mean: 771.96
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -77.19599999999924
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4473
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3479860608727943e-31
          cur_lr: 5.000000000000001e-05
          entropy: 0.0018025019675001709
          entropy_coeff: 0.009999999999999998
          kl: 0.0004854437018448667
          policy_loss: -0.016427932845221625
          total_loss: 0.3599756633241971
          vf_explained_var: 0.00038088031578809023
          vf_loss: 0.3764216207032506
    num_agent_steps_sampled: 1391000
    num_agent_steps_trained: 1391000
    num_steps_sampled: 1391000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1391,37022.3,1391000,-77.196,-21.5,-100,771.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1392000
  custom_metrics: {}
  date: 2021-10-22_06-03-02
  done: false
  episode_len_mean: 778.64
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -77.86399999999922
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4474
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.739930304363972e-32
          cur_lr: 5.000000000000001e-05
          entropy: 0.0067881540346813075
          entropy_coeff: 0.009999999999999998
          kl: 0.0001445380992329505
          policy_loss: -0.03104275061438481
          total_loss: 0.34871461701889833
          vf_explained_var: -0.001193531323224306
          vf_loss: 0.37982523219543507
    num_agent_steps_sampled: 1392000
    num_agent_steps_trained: 1392000
    num_steps_sampled: 1392000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1392,37031.4,1392000,-77.864,-21.5,-100,778.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1393000
  custom_metrics: {}
  date: 2021-10-22_06-03-12
  done: false
  episode_len_mean: 785.32
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -78.5319999999992
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4475
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.369965152181986e-32
          cur_lr: 5.000000000000001e-05
          entropy: 0.001726294315368351
          entropy_coeff: 0.009999999999999998
          kl: 0.00018018718875758118
          policy_loss: -0.008463666215538979
          total_loss: 0.3563779480341408
          vf_explained_var: 0.011488236486911774
          vf_loss: 0.3648588625950247
    num_agent_steps_sampled: 1393000
    num_agent_steps_trained: 1393000
    num_steps_sampled: 1393000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1393,37040.9,1393000,-78.532,-21.5,-100,785.32


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1394000
  custom_metrics: {}
  date: 2021-10-22_06-03-20
  done: false
  episode_len_mean: 792.37
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -79.23699999999918
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4476
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.684982576090993e-32
          cur_lr: 5.000000000000001e-05
          entropy: 0.026322208070066862
          entropy_coeff: 0.009999999999999998
          kl: 0.0351763941683127
          policy_loss: -0.017412593132919735
          total_loss: 0.3702914939986335
          vf_explained_var: 8.55949183460325e-05
          vf_loss: 0.3879673006669489
    num_agent_steps_sampled: 1394000
    num_agent_steps_trained: 1394000
    num_steps_sampled: 1394000
    num_steps_trained: 1394

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1394,37049.8,1394000,-79.237,-21.5,-100,792.37


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1395000
  custom_metrics: {}
  date: 2021-10-22_06-03-30
  done: false
  episode_len_mean: 799.65
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -79.96499999999918
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4477
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.527473864136489e-32
          cur_lr: 5.000000000000001e-05
          entropy: 0.009331626961971375
          entropy_coeff: 0.009999999999999998
          kl: 0.00034313260987131725
          policy_loss: -0.020791749656200408
          total_loss: 0.36700930028325984
          vf_explained_var: 9.645157115301117e-05
          vf_loss: 0.38789436374823305
    num_agent_steps_sampled: 1395000
    num_agent_steps_trained: 1395000
    num_steps_sampled: 1395000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1395,37059.2,1395000,-79.965,-21.5,-100,799.65


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1396000
  custom_metrics: {}
  date: 2021-10-22_06-03-39
  done: false
  episode_len_mean: 806.62
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -80.66199999999917
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4478
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2637369320682444e-32
          cur_lr: 5.000000000000001e-05
          entropy: 0.0018314956140885544
          entropy_coeff: 0.009999999999999998
          kl: 0.00035319322905747055
          policy_loss: -0.01765378870897823
          total_loss: 0.3704174441595872
          vf_explained_var: 0.002007003640756011
          vf_loss: 0.3880895421727069
    num_agent_steps_sampled: 1396000
    num_agent_steps_trained: 1396000
    num_steps_sampled: 1396000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1396,37068,1396000,-80.662,-21.5,-100,806.62


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1397000
  custom_metrics: {}
  date: 2021-10-22_06-03-48
  done: false
  episode_len_mean: 813.83
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -81.38299999999914
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4479
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.318684660341222e-33
          cur_lr: 5.000000000000001e-05
          entropy: 0.002116276555009083
          entropy_coeff: 0.009999999999999998
          kl: 0.00019971980303585085
          policy_loss: -0.01737408604886797
          total_loss: 0.37686259374022485
          vf_explained_var: 0.00024471813230775297
          vf_loss: 0.3942578525036677
    num_agent_steps_sampled: 1397000
    num_agent_steps_trained: 1397000
    num_steps_sampled: 1397000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1397,37077.6,1397000,-81.383,-21.5,-100,813.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1398000
  custom_metrics: {}
  date: 2021-10-22_06-03-57
  done: false
  episode_len_mean: 821.66
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -82.16599999999913
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4480
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.159342330170611e-33
          cur_lr: 5.000000000000001e-05
          entropy: 0.004538412346543434
          entropy_coeff: 0.009999999999999998
          kl: 9.366342644515759e-05
          policy_loss: -0.017890370388825735
          total_loss: 0.37600326190392175
          vf_explained_var: 0.0005523966392502189
          vf_loss: 0.3939390242371398
    num_agent_steps_sampled: 1398000
    num_agent_steps_trained: 1398000
    num_steps_sampled: 1398000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1398,37086.6,1398000,-82.166,-21.5,-100,821.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1399000
  custom_metrics: {}
  date: 2021-10-22_06-04-07
  done: false
  episode_len_mean: 829.17
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -82.91699999999912
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4481
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5796711650853055e-33
          cur_lr: 5.000000000000001e-05
          entropy: 0.009536229866919004
          entropy_coeff: 0.009999999999999998
          kl: 0.0007690695588656008
          policy_loss: -0.0176974103682571
          total_loss: 0.37934062381585437
          vf_explained_var: 0.0016768448986113071
          vf_loss: 0.3971334033441154
    num_agent_steps_sampled: 1399000
    num_agent_steps_trained: 1399000
    num_steps_sampled: 1399000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1399,37095.9,1399000,-82.917,-21.5,-100,829.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1400000
  custom_metrics: {}
  date: 2021-10-22_06-04-16
  done: false
  episode_len_mean: 836.74
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -83.67399999999911
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4482
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.898355825426527e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.013420206476520332
          entropy_coeff: 0.009999999999999998
          kl: 0.0005945215806037475
          policy_loss: -0.017817749828100204
          total_loss: 0.3853545149167379
          vf_explained_var: 0.0014187846099957824
          vf_loss: 0.4033064656755111
    num_agent_steps_sampled: 1400000
    num_agent_steps_trained: 1400000
    num_steps_sampled: 1400000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1400,37105,1400000,-83.674,-21.5,-100,836.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1401000
  custom_metrics: {}
  date: 2021-10-22_06-04-25
  done: false
  episode_len_mean: 844.3
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -84.4299999999991
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4483
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.949177912713264e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.00311268907907005
          entropy_coeff: 0.009999999999999998
          kl: 0.0013348657772039294
          policy_loss: -0.02692972558240096
          total_loss: 0.3779629414487216
          vf_explained_var: -0.001601567491889
          vf_loss: 0.40492380067282485
    num_agent_steps_sampled: 1401000
    num_agent_steps_trained: 1401000
    num_steps_sampled: 1401000
    num_steps_trained: 140100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1401,37114.1,1401000,-84.43,-21.5,-100,844.3


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1402000
  custom_metrics: {}
  date: 2021-10-22_06-04-36
  done: false
  episode_len_mean: 852.08
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -85.20799999999909
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4484
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.974588956356632e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.004407537650937835
          entropy_coeff: 0.009999999999999998
          kl: 9.364721787271533e-05
          policy_loss: -0.01771181896328926
          total_loss: 0.37877980379594695
          vf_explained_var: 0.0005948165780864656
          vf_loss: 0.3965356962132824
    num_agent_steps_sampled: 1402000
    num_agent_steps_trained: 1402000
    num_steps_sampled: 1402000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1402,37124.8,1402000,-85.208,-21.5,-100,852.08


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1403000
  custom_metrics: {}
  date: 2021-10-22_06-04-46
  done: false
  episode_len_mean: 859.71
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -85.97099999999908
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4485
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.87294478178316e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.003530550371053525
          entropy_coeff: 0.009999999999999998
          kl: 0.0006011665641172688
          policy_loss: -0.017852750089433458
          total_loss: 0.38127644095155927
          vf_explained_var: 7.012486457824707e-05
          vf_loss: 0.3991644968530939
    num_agent_steps_sampled: 1403000
    num_agent_steps_trained: 1403000
    num_steps_sampled: 1403000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1403,37134.6,1403000,-85.971,-21.5,-100,859.71


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1404000
  custom_metrics: {}
  date: 2021-10-22_06-04-55
  done: false
  episode_len_mean: 867.39
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -86.73899999999905
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4486
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.93647239089158e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.00499852777704493
          entropy_coeff: 0.009999999999999998
          kl: 0.0004378245209905565
          policy_loss: -0.017903870923651588
          total_loss: 0.38214112520217897
          vf_explained_var: 0.0002987537009175867
          vf_loss: 0.4000949992487828
    num_agent_steps_sampled: 1404000
    num_agent_steps_trained: 1404000
    num_steps_sampled: 1404000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1404,37143.7,1404000,-86.739,-21.5,-100,867.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1405000
  custom_metrics: {}
  date: 2021-10-22_06-05-04
  done: false
  episode_len_mean: 875.03
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -87.50299999999903
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4487
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.46823619544579e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.004830173945326048
          entropy_coeff: 0.009999999999999998
          kl: 6.070503684538209e-05
          policy_loss: -0.017968784107102287
          total_loss: 0.3836127976576487
          vf_explained_var: 0.0003939125163014978
          vf_loss: 0.40162989034757224
    num_agent_steps_sampled: 1405000
    num_agent_steps_trained: 1405000
    num_steps_sampled: 1405000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1405,37153.2,1405000,-87.503,-21.5,-100,875.03


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1406000
  custom_metrics: {}
  date: 2021-10-22_06-05-13
  done: false
  episode_len_mean: 882.51
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -88.25099999999901
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4488
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.234118097722895e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.004790632543477437
          entropy_coeff: 0.009999999999999998
          kl: 0.00014657821100816864
          policy_loss: -0.018168133000532785
          total_loss: 0.38383594246374236
          vf_explained_var: 0.0003062440373469144
          vf_loss: 0.40205198763578665
    num_agent_steps_sampled: 1406000
    num_agent_steps_trained: 1406000
    num_steps_sampled: 1406000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1406,37162.1,1406000,-88.251,-21.5,-100,882.51


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1407000
  custom_metrics: {}
  date: 2021-10-22_06-05-23
  done: false
  episode_len_mean: 890.05
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -89.004999999999
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4489
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.170590488614475e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.011584603032679297
          entropy_coeff: 0.009999999999999998
          kl: 0.0008059704380157098
          policy_loss: -0.01836621446741952
          total_loss: 0.3835265677836206
          vf_explained_var: 0.0014043886912986636
          vf_loss: 0.402008637732312
    num_agent_steps_sampled: 1407000
    num_agent_steps_trained: 1407000
    num_steps_sampled: 1407000
    num_steps_trained: 1407

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1407,37171.8,1407000,-89.005,-21.5,-100,890.05


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1408000
  custom_metrics: {}
  date: 2021-10-22_06-05-32
  done: false
  episode_len_mean: 897.62
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -89.76199999999899
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4490
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.085295244307237e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.007524154175836076
          entropy_coeff: 0.009999999999999998
          kl: 0.0005796433551733299
          policy_loss: -0.019361718661255307
          total_loss: 0.3858101225561566
          vf_explained_var: 0.0002347138215554878
          vf_loss: 0.40524709326370306
    num_agent_steps_sampled: 1408000
    num_agent_steps_trained: 1408000
    num_steps_sampled: 1408000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1408,37180.6,1408000,-89.762,-21.5,-100,897.62


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1409000
  custom_metrics: {}
  date: 2021-10-22_06-05-41
  done: false
  episode_len_mean: 905.14
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -90.51399999999896
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4491
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5426476221536186e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.005371981963010816
          entropy_coeff: 0.009999999999999998
          kl: 9.770833750847894e-05
          policy_loss: -0.018021609965297912
          total_loss: 0.3858758182161384
          vf_explained_var: 0.0012334021739661694
          vf_loss: 0.4039511394108963
    num_agent_steps_sampled: 1409000
    num_agent_steps_trained: 1409000
    num_steps_sampled: 1409000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1409,37190.1,1409000,-90.514,-21.5,-100,905.14


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1410000
  custom_metrics: {}
  date: 2021-10-22_06-05-50
  done: false
  episode_len_mean: 912.8
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -91.27999999999895
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4492
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.713238110768093e-37
          cur_lr: 5.000000000000001e-05
          entropy: 0.012219422362330886
          entropy_coeff: 0.009999999999999998
          kl: 0.00047913310435015457
          policy_loss: -0.020867834447158708
          total_loss: 0.38381141502824095
          vf_explained_var: -4.845195235247957e-06
          vf_loss: 0.4048014435074745
    num_agent_steps_sampled: 1410000
    num_agent_steps_trained: 1410000
    num_steps_sampled: 1410000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1410,37199,1410000,-91.28,-21.5,-100,912.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1411000
  custom_metrics: {}
  date: 2021-10-22_06-05-59
  done: false
  episode_len_mean: 919.64
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -91.96399999999895
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4493
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.8566190553840466e-37
          cur_lr: 5.000000000000001e-05
          entropy: 0.009647799159736476
          entropy_coeff: 0.009999999999999998
          kl: 0.0027031529091347995
          policy_loss: -0.05493003494209713
          total_loss: 0.960671766102314
          vf_explained_var: 0.0036433874629437923
          vf_loss: 1.0156982756278012
    num_agent_steps_sampled: 1411000
    num_agent_steps_trained: 1411000
    num_steps_sampled: 1411000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1411,37208.2,1411000,-91.964,-21.5,-100,919.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1412000
  custom_metrics: {}
  date: 2021-10-22_06-06-08
  done: false
  episode_len_mean: 927.49
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -92.74899999999893
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4494
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9283095276920233e-37
          cur_lr: 5.000000000000001e-05
          entropy: 0.038984351370244336
          entropy_coeff: 0.009999999999999998
          kl: 0.04298474286570201
          policy_loss: -0.06615753852658801
          total_loss: 0.888331859641605
          vf_explained_var: 0.009474189020693302
          vf_loss: 0.954879237858889
    num_agent_steps_sampled: 1412000
    num_agent_steps_trained: 1412000
    num_steps_sampled: 1412000
    num_steps_trained: 141200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1412,37217.3,1412000,-92.749,-21.8,-100,927.49


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1413000
  custom_metrics: {}
  date: 2021-10-22_06-06-17
  done: false
  episode_len_mean: 935.31
  episode_media: {}
  episode_reward_max: -48.40000000000042
  episode_reward_mean: -93.53099999999891
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4495
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8924642915380354e-37
          cur_lr: 5.000000000000001e-05
          entropy: 0.016032218918391867
          entropy_coeff: 0.009999999999999998
          kl: 0.0022046214617694798
          policy_loss: -0.055018408762084116
          total_loss: 0.8757661042941941
          vf_explained_var: 0.007128206081688404
          vf_loss: 0.9309448441594012
    num_agent_steps_sampled: 1413000
    num_agent_steps_trained: 1413000
    num_steps_sampled: 1413000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1413,37226.4,1413000,-93.531,-48.4,-100,935.31


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1414000
  custom_metrics: {}
  date: 2021-10-22_06-06-27
  done: false
  episode_len_mean: 940.47
  episode_media: {}
  episode_reward_max: -50.400000000000446
  episode_reward_mean: -94.0469999999989
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4496
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4462321457690177e-37
          cur_lr: 5.000000000000001e-05
          entropy: 0.008861625591008407
          entropy_coeff: 0.009999999999999998
          kl: 0.0007067397794265536
          policy_loss: -0.05461302548646927
          total_loss: 0.8646796507967843
          vf_explained_var: 0.0053720599971711636
          vf_loss: 0.9193812947202887
    num_agent_steps_sampled: 1414000
    num_agent_steps_trained: 1414000
    num_steps_sampled: 1414000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1414,37235.8,1414000,-94.047,-50.4,-100,940.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1415000
  custom_metrics: {}
  date: 2021-10-22_06-06-36
  done: false
  episode_len_mean: 943.27
  episode_media: {}
  episode_reward_max: -50.400000000000446
  episode_reward_mean: -94.32699999999888
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4497
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.231160728845088e-38
          cur_lr: 5.000000000000001e-05
          entropy: 0.012871672870500738
          entropy_coeff: 0.009999999999999998
          kl: 0.0019651175551138895
          policy_loss: -0.054240538014305964
          total_loss: 0.8436783778998587
          vf_explained_var: 0.00888300035148859
          vf_loss: 0.8980476322169934
    num_agent_steps_sampled: 1415000
    num_agent_steps_trained: 1415000
    num_steps_sampled: 1415000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1415,37244.9,1415000,-94.327,-50.4,-100,943.27


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1416000
  custom_metrics: {}
  date: 2021-10-22_06-06-46
  done: false
  episode_len_mean: 943.92
  episode_media: {}
  episode_reward_max: -50.400000000000446
  episode_reward_mean: -94.39199999999887
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4498
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.615580364422544e-38
          cur_lr: 5.000000000000001e-05
          entropy: 0.020835688686767957
          entropy_coeff: 0.009999999999999998
          kl: 0.0027729606635400164
          policy_loss: -0.053874555892414515
          total_loss: 0.8446612829963366
          vf_explained_var: 0.007645371835678816
          vf_loss: 0.8987441772802008
    num_agent_steps_sampled: 1416000
    num_agent_steps_trained: 1416000
    num_steps_sampled: 1416000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1416,37254.4,1416000,-94.392,-50.4,-100,943.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1417000
  custom_metrics: {}
  date: 2021-10-22_06-06-54
  done: false
  episode_len_mean: 944.48
  episode_media: {}
  episode_reward_max: -50.400000000000446
  episode_reward_mean: -94.44799999999888
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4499
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.807790182211272e-38
          cur_lr: 5.000000000000001e-05
          entropy: 0.014082960738839271
          entropy_coeff: 0.009999999999999998
          kl: 0.004087346045528937
          policy_loss: -0.055817867484357624
          total_loss: 0.8078606453206804
          vf_explained_var: 0.012661581858992577
          vf_loss: 0.8638193636919217
    num_agent_steps_sampled: 1417000
    num_agent_steps_trained: 1417000
    num_steps_sampled: 1417000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1417,37263.3,1417000,-94.448,-50.4,-100,944.48




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1418000
  custom_metrics: {}
  date: 2021-10-22_06-07-20
  done: false
  episode_len_mean: 945.15
  episode_media: {}
  episode_reward_max: -50.400000000000446
  episode_reward_mean: -94.51499999999888
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4500
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.03895091105636e-39
          cur_lr: 5.000000000000001e-05
          entropy: 0.01847455642806987
          entropy_coeff: 0.009999999999999998
          kl: 0.0024288119019153856
          policy_loss: -0.052309921466641957
          total_loss: 0.8344393907321824
          vf_explained_var: 0.002438016701489687
          vf_loss: 0.8869340551189251
    num_agent_steps_sampled: 1418000
    num_agent_steps_trained: 1418000
    num_steps_sampled: 1418000
    num_steps_trained: 141

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1418,37289.2,1418000,-94.515,-50.4,-100,945.15


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1419000
  custom_metrics: {}
  date: 2021-10-22_06-07-31
  done: false
  episode_len_mean: 949.96
  episode_media: {}
  episode_reward_max: -50.400000000000446
  episode_reward_mean: -94.99599999999886
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4501
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.51947545552818e-39
          cur_lr: 5.000000000000001e-05
          entropy: 0.009140756175232431
          entropy_coeff: 0.009999999999999998
          kl: 0.0004892318304505316
          policy_loss: -0.05174484567509757
          total_loss: 0.8293059105674426
          vf_explained_var: 0.0035047312267124653
          vf_loss: 0.8811421491619613
    num_agent_steps_sampled: 1419000
    num_agent_steps_trained: 1419000
    num_steps_sampled: 1419000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1419,37299.6,1419000,-94.996,-50.4,-100,949.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1420000
  custom_metrics: {}
  date: 2021-10-22_06-07-40
  done: false
  episode_len_mean: 954.15
  episode_media: {}
  episode_reward_max: -50.400000000000446
  episode_reward_mean: -95.41499999999884
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4502
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.25973772776409e-39
          cur_lr: 5.000000000000001e-05
          entropy: 0.01276592909772363
          entropy_coeff: 0.009999999999999998
          kl: 0.0003604669996408245
          policy_loss: -0.05259347177214092
          total_loss: 0.8323193455735842
          vf_explained_var: 0.0012577434536069632
          vf_loss: 0.8850404559738106
    num_agent_steps_sampled: 1420000
    num_agent_steps_trained: 1420000
    num_steps_sampled: 1420000
    num_steps_trained: 142

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1420,37309.1,1420000,-95.415,-50.4,-100,954.15


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1421000
  custom_metrics: {}
  date: 2021-10-22_06-07-49
  done: false
  episode_len_mean: 959.11
  episode_media: {}
  episode_reward_max: -50.50000000000045
  episode_reward_mean: -95.91099999999882
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4503
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.129868863882045e-39
          cur_lr: 5.000000000000001e-05
          entropy: 0.01879776966654592
          entropy_coeff: 0.009999999999999998
          kl: 0.0005652425195978911
          policy_loss: -0.05377723558081521
          total_loss: 0.8235675828324424
          vf_explained_var: 0.0003587742685340345
          vf_loss: 0.8775327901459402
    num_agent_steps_sampled: 1421000
    num_agent_steps_trained: 1421000
    num_steps_sampled: 1421000
    num_steps_trained: 142

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1421,37318.1,1421000,-95.911,-50.5,-100,959.11


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1422000
  custom_metrics: {}
  date: 2021-10-22_06-07-59
  done: false
  episode_len_mean: 964.06
  episode_media: {}
  episode_reward_max: -61.600000000000605
  episode_reward_mean: -96.40599999999878
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4504
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.649344319410225e-40
          cur_lr: 5.000000000000001e-05
          entropy: 0.01613507072130839
          entropy_coeff: 0.009999999999999998
          kl: 0.000391191469984431
          policy_loss: -0.052685773869355516
          total_loss: 0.8186480245656438
          vf_explained_var: 0.0016660153632983565
          vf_loss: 0.8714951547483604
    num_agent_steps_sampled: 1422000
    num_agent_steps_trained: 1422000
    num_steps_sampled: 1422000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1422,37327.5,1422000,-96.406,-61.6,-100,964.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1423000
  custom_metrics: {}
  date: 2021-10-22_06-08-08
  done: false
  episode_len_mean: 967.79
  episode_media: {}
  episode_reward_max: -61.600000000000605
  episode_reward_mean: -96.77899999999877
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4505
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8246721597051126e-40
          cur_lr: 5.000000000000001e-05
          entropy: 0.011639137467783358
          entropy_coeff: 0.009999999999999998
          kl: 0.0011164535455658183
          policy_loss: -0.05222838736242718
          total_loss: 0.8109060653381878
          vf_explained_var: 0.0029838376212865114
          vf_loss: 0.863250840206941
    num_agent_steps_sampled: 1423000
    num_agent_steps_trained: 1423000
    num_steps_sampled: 1423000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1423,37336.6,1423000,-96.779,-61.6,-100,967.79


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1424000
  custom_metrics: {}
  date: 2021-10-22_06-08-17
  done: false
  episode_len_mean: 971.63
  episode_media: {}
  episode_reward_max: -70.70000000000026
  episode_reward_mean: -97.16299999999876
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4506
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4123360798525563e-40
          cur_lr: 5.000000000000001e-05
          entropy: 0.016922062966558668
          entropy_coeff: 0.009999999999999998
          kl: 0.0005254556695817463
          policy_loss: -0.05272828348808818
          total_loss: 0.8165540503131019
          vf_explained_var: 0.0017981324344873428
          vf_loss: 0.8694515466483103
    num_agent_steps_sampled: 1424000
    num_agent_steps_trained: 1424000
    num_steps_sampled: 1424000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1424,37345.6,1424000,-97.163,-70.7,-100,971.63


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1425000
  custom_metrics: {}
  date: 2021-10-22_06-08-26
  done: false
  episode_len_mean: 974.56
  episode_media: {}
  episode_reward_max: -72.00000000000018
  episode_reward_mean: -97.45599999999875
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4507
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.061680399262782e-41
          cur_lr: 5.000000000000001e-05
          entropy: 0.026248124393168835
          entropy_coeff: 0.009999999999999998
          kl: 0.0016909657565420055
          policy_loss: -0.05601812824606896
          total_loss: 0.7883967141310374
          vf_explained_var: 0.004317662678658962
          vf_loss: 0.8446773424537646
    num_agent_steps_sampled: 1425000
    num_agent_steps_trained: 1425000
    num_steps_sampled: 1425000
    num_steps_trained: 142

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1425,37354.9,1425000,-97.456,-72,-100,974.56


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1426000
  custom_metrics: {}
  date: 2021-10-22_06-08-35
  done: false
  episode_len_mean: 976.68
  episode_media: {}
  episode_reward_max: -72.00000000000018
  episode_reward_mean: -97.66799999999871
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4508
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.530840199631391e-41
          cur_lr: 5.000000000000001e-05
          entropy: 0.2034416298938191
          entropy_coeff: 0.009999999999999998
          kl: 0.1764608814525403
          policy_loss: -0.059655395564105776
          total_loss: 0.770698552330335
          vf_explained_var: 0.009131328202784061
          vf_loss: 0.832388360446526
    num_agent_steps_sampled: 1426000
    num_agent_steps_trained: 1426000
    num_steps_sampled: 1426000
    num_steps_trained: 1426000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1426,37363.9,1426000,-97.668,-72,-100,976.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1427000
  custom_metrics: {}
  date: 2021-10-22_06-08-45
  done: false
  episode_len_mean: 978.46
  episode_media: {}
  episode_reward_max: -72.00000000000018
  episode_reward_mean: -97.84599999999871
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4509
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.296260299447087e-41
          cur_lr: 5.000000000000001e-05
          entropy: 0.11491983144595805
          entropy_coeff: 0.009999999999999998
          kl: 0.8097691628052105
          policy_loss: -0.002590957283973694
          total_loss: 0.8528005970848931
          vf_explained_var: -0.09415990859270096
          vf_loss: 0.8565407594174353
    num_agent_steps_sampled: 1427000
    num_agent_steps_trained: 1427000
    num_steps_sampled: 1427000
    num_steps_trained: 142700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1427,37373.3,1427000,-97.846,-72,-100,978.46


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1428000
  custom_metrics: {}
  date: 2021-10-22_06-08-54
  done: false
  episode_len_mean: 981.15
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -98.1149999999987
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4510
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.944390449170631e-41
          cur_lr: 5.000000000000001e-05
          entropy: 0.06182271022763517
          entropy_coeff: 0.009999999999999998
          kl: 0.2637647263123578
          policy_loss: -0.045446545713477664
          total_loss: 0.8027377092176013
          vf_explained_var: -0.30631113052368164
          vf_loss: 0.8488024705089628
    num_agent_steps_sampled: 1428000
    num_agent_steps_trained: 1428000
    num_steps_sampled: 1428000
    num_steps_trained: 1428000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1428,37382.4,1428000,-98.115,-84.7,-100,981.15


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1429000
  custom_metrics: {}
  date: 2021-10-22_06-09-03
  done: false
  episode_len_mean: 982.42
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -98.24199999999868
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4511
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1916585673755949e-40
          cur_lr: 5.000000000000001e-05
          entropy: 0.06610990450200108
          entropy_coeff: 0.009999999999999998
          kl: 0.0032009644940945393
          policy_loss: -0.05088789910078049
          total_loss: 0.78980044407977
          vf_explained_var: -0.17652954161167145
          vf_loss: 0.8413494499038077
    num_agent_steps_sampled: 1429000
    num_agent_steps_trained: 1429000
    num_steps_sampled: 1429000
    num_steps_trained: 14290

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1429,37391.9,1429000,-98.242,-84.7,-100,982.42


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1430000
  custom_metrics: {}
  date: 2021-10-22_06-09-12
  done: false
  episode_len_mean: 982.84
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -98.28399999999868
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4512
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.958292836877974e-41
          cur_lr: 5.000000000000001e-05
          entropy: 0.026563746627006266
          entropy_coeff: 0.009999999999999998
          kl: 0.005196247814006084
          policy_loss: -0.05354037450419532
          total_loss: 0.7631813776161935
          vf_explained_var: 0.08149588853120804
          vf_loss: 0.8169873961868386
    num_agent_steps_sampled: 1430000
    num_agent_steps_trained: 1430000
    num_steps_sampled: 1430000
    num_steps_trained: 14300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1430,37400.7,1430000,-98.284,-84.7,-100,982.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1431000
  custom_metrics: {}
  date: 2021-10-22_06-09-22
  done: false
  episode_len_mean: 983.56
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -98.35599999999867
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4513
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.958292836877974e-41
          cur_lr: 5.000000000000001e-05
          entropy: 0.058448466257606115
          entropy_coeff: 0.009999999999999998
          kl: 0.006565293473362847
          policy_loss: -0.05203831742207209
          total_loss: 0.7705625252591239
          vf_explained_var: -0.10418178886175156
          vf_loss: 0.8231853305465645
    num_agent_steps_sampled: 1431000
    num_agent_steps_trained: 1431000
    num_steps_sampled: 1431000
    num_steps_trained: 1431

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1431,37410.3,1431000,-98.356,-84.7,-100,983.56


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1432000
  custom_metrics: {}
  date: 2021-10-22_06-09-31
  done: false
  episode_len_mean: 984.96
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -98.49599999999867
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4514
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.958292836877974e-41
          cur_lr: 5.000000000000001e-05
          entropy: 0.05225178885739297
          entropy_coeff: 0.009999999999999998
          kl: 0.07364080716676603
          policy_loss: -0.05026111238532596
          total_loss: 0.7860595320661863
          vf_explained_var: -0.1855592131614685
          vf_loss: 0.8368431489500735
    num_agent_steps_sampled: 1432000
    num_agent_steps_trained: 1432000
    num_steps_sampled: 1432000
    num_steps_trained: 1432000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1432,37419.1,1432000,-98.496,-84.7,-100,984.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1433000
  custom_metrics: {}
  date: 2021-10-22_06-09-40
  done: false
  episode_len_mean: 985.44
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -98.54399999999868
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4515
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.937439255316959e-41
          cur_lr: 5.000000000000001e-05
          entropy: 0.013442134119880695
          entropy_coeff: 0.009999999999999998
          kl: 0.00020126099039515172
          policy_loss: -0.05124271147780948
          total_loss: 0.7842309471633699
          vf_explained_var: -0.23215454816818237
          vf_loss: 0.835608071130183
    num_agent_steps_sampled: 1433000
    num_agent_steps_trained: 1433000
    num_steps_sampled: 1433000
    num_steps_trained: 143

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1433,37428.4,1433000,-98.544,-84.7,-100,985.44


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1434000
  custom_metrics: {}
  date: 2021-10-22_06-09-49
  done: false
  episode_len_mean: 986.0
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -98.59999999999866
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4516
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.4687196276584797e-41
          cur_lr: 5.000000000000001e-05
          entropy: 0.02519339858683654
          entropy_coeff: 0.009999999999999998
          kl: 0.002002107392379816
          policy_loss: -0.05200043453110589
          total_loss: 0.780876295765241
          vf_explained_var: 0.07943741232156754
          vf_loss: 0.8331286503312488
    num_agent_steps_sampled: 1434000
    num_agent_steps_trained: 1434000
    num_steps_sampled: 1434000
    num_steps_trained: 1434000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1434,37437.5,1434000,-98.6,-84.7,-100,986


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1435000
  custom_metrics: {}
  date: 2021-10-22_06-09-58
  done: false
  episode_len_mean: 986.28
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -98.62799999999866
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4517
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2343598138292398e-41
          cur_lr: 5.000000000000001e-05
          entropy: 0.009557299982083755
          entropy_coeff: 0.009999999999999998
          kl: 0.0025307553517911843
          policy_loss: -0.051810541914569005
          total_loss: 0.7928930176628961
          vf_explained_var: -0.3267555832862854
          vf_loss: 0.844799132241557
    num_agent_steps_sampled: 1435000
    num_agent_steps_trained: 1435000
    num_steps_sampled: 1435000
    num_steps_trained: 143

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1435,37446.9,1435000,-98.628,-84.7,-100,986.28


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1436000
  custom_metrics: {}
  date: 2021-10-22_06-10-08
  done: false
  episode_len_mean: 986.84
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -98.68399999999866
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4518
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1171799069146199e-41
          cur_lr: 5.000000000000001e-05
          entropy: 0.012811226864707552
          entropy_coeff: 0.009999999999999998
          kl: 0.0005731668043759241
          policy_loss: -0.05122211592064963
          total_loss: 0.7720099000467194
          vf_explained_var: -0.32980769872665405
          vf_loss: 0.8233601236457212
    num_agent_steps_sampled: 1436000
    num_agent_steps_trained: 1436000
    num_steps_sampled: 1436000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1436,37456.1,1436000,-98.684,-84.7,-100,986.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1437000
  custom_metrics: {}
  date: 2021-10-22_06-10-17
  done: false
  episode_len_mean: 987.04
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -98.70399999999867
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4519
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.5858995345730996e-42
          cur_lr: 5.000000000000001e-05
          entropy: 0.045705462068629764
          entropy_coeff: 0.009999999999999998
          kl: 0.006593034498159699
          policy_loss: -0.055076316992441816
          total_loss: 0.8001096395982636
          vf_explained_var: 0.18556459248065948
          vf_loss: 0.8556430124018031
    num_agent_steps_sampled: 1437000
    num_agent_steps_trained: 1437000
    num_steps_sampled: 1437000
    num_steps_trained: 143

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1437,37465.1,1437000,-98.704,-84.7,-100,987.04


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1438000
  custom_metrics: {}
  date: 2021-10-22_06-10-26
  done: false
  episode_len_mean: 987.6
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -98.75999999999866
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4520
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.5858995345730996e-42
          cur_lr: 5.000000000000001e-05
          entropy: 0.015915554917107024
          entropy_coeff: 0.009999999999999998
          kl: 0.002619764175916354
          policy_loss: -0.05154288394583596
          total_loss: 0.7916390783256955
          vf_explained_var: -0.15035380423069
          vf_loss: 0.8433411170987205
    num_agent_steps_sampled: 1438000
    num_agent_steps_trained: 1438000
    num_steps_sampled: 1438000
    num_steps_trained: 1438000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1438,37474.4,1438000,-98.76,-84.7,-100,987.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1439000
  custom_metrics: {}
  date: 2021-10-22_06-10-35
  done: false
  episode_len_mean: 988.12
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -98.81199999999866
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4521
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7929497672865498e-42
          cur_lr: 5.000000000000001e-05
          entropy: 0.059581521034447686
          entropy_coeff: 0.009999999999999998
          kl: 0.007613871546656976
          policy_loss: -0.05235561794704861
          total_loss: 0.7868790556987126
          vf_explained_var: -0.3182380497455597
          vf_loss: 0.8398304973108073
    num_agent_steps_sampled: 1439000
    num_agent_steps_trained: 1439000
    num_steps_sampled: 1439000
    num_steps_trained: 1439

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1439,37483.4,1439000,-98.812,-84.7,-100,988.12


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1440000
  custom_metrics: {}
  date: 2021-10-22_06-10-44
  done: false
  episode_len_mean: 988.6
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -98.85999999999865
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4522
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7929497672865498e-42
          cur_lr: 5.000000000000001e-05
          entropy: 0.05309303477406502
          entropy_coeff: 0.009999999999999998
          kl: 0.0011904701790442513
          policy_loss: -0.05159642812278536
          total_loss: 0.795167642666234
          vf_explained_var: -0.26259154081344604
          vf_loss: 0.8472950107728442
    num_agent_steps_sampled: 1440000
    num_agent_steps_trained: 1440000
    num_steps_sampled: 1440000
    num_steps_trained: 14400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1440,37492.8,1440000,-98.86,-84.7,-100,988.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1441000
  custom_metrics: {}
  date: 2021-10-22_06-10-54
  done: false
  episode_len_mean: 989.68
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -98.96799999999865
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4523
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3964748836432749e-42
          cur_lr: 5.000000000000001e-05
          entropy: 0.036194871614376706
          entropy_coeff: 0.009999999999999998
          kl: 0.00268292796683293
          policy_loss: -0.051814407606919605
          total_loss: 0.8069512473212348
          vf_explained_var: -0.3319629728794098
          vf_loss: 0.8591276174411178
    num_agent_steps_sampled: 1441000
    num_agent_steps_trained: 1441000
    num_steps_sampled: 1441000
    num_steps_trained: 1441

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1441,37501.8,1441000,-98.968,-84.7,-100,989.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1442000
  custom_metrics: {}
  date: 2021-10-22_06-11-03
  done: false
  episode_len_mean: 990.2
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -99.01999999999866
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4524
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.9823744182163745e-43
          cur_lr: 5.000000000000001e-05
          entropy: 0.01997108979978495
          entropy_coeff: 0.009999999999999998
          kl: 0.00013435658983331297
          policy_loss: -0.051349995533625285
          total_loss: 0.8049345504906442
          vf_explained_var: -0.3086606562137604
          vf_loss: 0.8564842669707206
    num_agent_steps_sampled: 1442000
    num_agent_steps_trained: 1442000
    num_steps_sampled: 1442000
    num_steps_trained: 144

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1442,37511.6,1442000,-99.02,-84.7,-100,990.2


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1443000
  custom_metrics: {}
  date: 2021-10-22_06-11-12
  done: false
  episode_len_mean: 990.92
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -99.09199999999866
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4525
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4911872091081873e-43
          cur_lr: 5.000000000000001e-05
          entropy: 0.016286902129650116
          entropy_coeff: 0.009999999999999998
          kl: 0.00011967814426107676
          policy_loss: -0.05145070602496465
          total_loss: 0.8051489624712203
          vf_explained_var: -0.3084220290184021
          vf_loss: 0.8567625571042299
    num_agent_steps_sampled: 1443000
    num_agent_steps_trained: 1443000
    num_steps_sampled: 1443000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1443,37520.5,1443000,-99.092,-84.7,-100,990.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1444000
  custom_metrics: {}
  date: 2021-10-22_06-11-22
  done: false
  episode_len_mean: 991.04
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -99.10399999999863
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4526
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7455936045540936e-43
          cur_lr: 5.000000000000001e-05
          entropy: 0.0170244043899907
          entropy_coeff: 0.009999999999999998
          kl: 7.393309983526485e-06
          policy_loss: -0.051094688309563534
          total_loss: 0.7969082741273774
          vf_explained_var: -0.28597062826156616
          vf_loss: 0.8481732056579656
    num_agent_steps_sampled: 1444000
    num_agent_steps_trained: 1444000
    num_steps_sampled: 1444000
    num_steps_trained: 144

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1444,37529.9,1444000,-99.104,-84.7,-100,991.04


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1445000
  custom_metrics: {}
  date: 2021-10-22_06-11-30
  done: false
  episode_len_mean: 991.12
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -99.11199999999866
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4527
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.727968022770468e-44
          cur_lr: 5.000000000000001e-05
          entropy: 0.015271732583642006
          entropy_coeff: 0.009999999999999998
          kl: 9.802246606824086e-05
          policy_loss: -0.051186400486363304
          total_loss: 0.7936122814814249
          vf_explained_var: -0.28762298822402954
          vf_loss: 0.8449513924618562
    num_agent_steps_sampled: 1445000
    num_agent_steps_trained: 1445000
    num_steps_sampled: 1445000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1445,37538.7,1445000,-99.112,-84.7,-100,991.12


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1446000
  custom_metrics: {}
  date: 2021-10-22_06-11-40
  done: false
  episode_len_mean: 991.28
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -99.12799999999865
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4528
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.363984011385234e-44
          cur_lr: 5.000000000000001e-05
          entropy: 0.012268224731087685
          entropy_coeff: 0.009999999999999998
          kl: 0.00030312782293560046
          policy_loss: -0.05122384230295817
          total_loss: 0.798479286995199
          vf_explained_var: -0.30991336703300476
          vf_loss: 0.8498258120690783
    num_agent_steps_sampled: 1446000
    num_agent_steps_trained: 1446000
    num_steps_sampled: 1446000
    num_steps_trained: 144

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1446,37548.4,1446000,-99.128,-84.7,-100,991.28


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1447000
  custom_metrics: {}
  date: 2021-10-22_06-11-49
  done: false
  episode_len_mean: 991.36
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -99.13599999999865
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4529
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.181992005692617e-44
          cur_lr: 5.000000000000001e-05
          entropy: 0.013163457790182697
          entropy_coeff: 0.009999999999999998
          kl: 0.0001520239884838971
          policy_loss: -0.050923227932718064
          total_loss: 0.7917399537232187
          vf_explained_var: -0.26857370138168335
          vf_loss: 0.8427948205421368
    num_agent_steps_sampled: 1447000
    num_agent_steps_trained: 1447000
    num_steps_sampled: 1447000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1447,37557.2,1447000,-99.136,-84.7,-100,991.36




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1448000
  custom_metrics: {}
  date: 2021-10-22_06-12-15
  done: false
  episode_len_mean: 990.25
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -99.02499999999864
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4530
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0909960028463085e-44
          cur_lr: 5.000000000000001e-05
          entropy: 0.006155882117390219
          entropy_coeff: 0.009999999999999998
          kl: 0.0005746930870412671
          policy_loss: -0.05330205708742142
          total_loss: 0.7623505738046434
          vf_explained_var: 0.011716877110302448
          vf_loss: 0.8157141998306745
    num_agent_steps_sampled: 1448000
    num_agent_steps_trained: 1448000
    num_steps_sampled: 1448000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1448,37583.1,1448000,-99.025,-84.7,-100,990.25


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1449000
  custom_metrics: {}
  date: 2021-10-22_06-12-24
  done: false
  episode_len_mean: 991.01
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -99.10099999999863
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4531
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.4549800142315426e-45
          cur_lr: 5.000000000000001e-05
          entropy: 0.007398859939227501
          entropy_coeff: 0.009999999999999998
          kl: 0.005084398216087263
          policy_loss: -0.05256232420603434
          total_loss: 0.805619709359275
          vf_explained_var: 0.002390401205047965
          vf_loss: 0.8582560202624235
    num_agent_steps_sampled: 1449000
    num_agent_steps_trained: 1449000
    num_steps_sampled: 1449000
    num_steps_trained: 1449

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1449,37592.5,1449000,-99.101,-84.7,-100,991.01


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1450000
  custom_metrics: {}
  date: 2021-10-22_06-12-33
  done: false
  episode_len_mean: 991.01
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -99.10099999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4532
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.4549800142315426e-45
          cur_lr: 5.000000000000001e-05
          entropy: 0.023387704108447198
          entropy_coeff: 0.009999999999999998
          kl: 0.038885261977810066
          policy_loss: -0.05396351549360487
          total_loss: 0.6950560950570637
          vf_explained_var: 0.03276605159044266
          vf_loss: 0.7492534794146195
    num_agent_steps_sampled: 1450000
    num_agent_steps_trained: 1450000
    num_steps_sampled: 1450000
    num_steps_trained: 1450

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1450,37601.5,1450000,-99.101,-84.7,-100,991.01


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1451000
  custom_metrics: {}
  date: 2021-10-22_06-12-43
  done: false
  episode_len_mean: 991.09
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -99.10899999999863
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4533
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.182470021347313e-45
          cur_lr: 5.000000000000001e-05
          entropy: 0.014924959467154823
          entropy_coeff: 0.009999999999999998
          kl: 0.0027575253258959485
          policy_loss: -0.05965075459745195
          total_loss: 0.6587077306376563
          vf_explained_var: -0.005202873609960079
          vf_loss: 0.7185077325596164
    num_agent_steps_sampled: 1451000
    num_agent_steps_trained: 1451000
    num_steps_sampled: 1451000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1451,37611.1,1451000,-99.109,-84.7,-100,991.09


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1452000
  custom_metrics: {}
  date: 2021-10-22_06-12-52
  done: false
  episode_len_mean: 991.09
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -99.10899999999863
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4534
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.0912350106736566e-45
          cur_lr: 5.000000000000001e-05
          entropy: 0.017717236932367087
          entropy_coeff: 0.009999999999999998
          kl: 0.0017185950887301299
          policy_loss: -0.052282133367326525
          total_loss: 0.7032887145876885
          vf_explained_var: 0.0033165363129228354
          vf_loss: 0.7557480238791969
    num_agent_steps_sampled: 1452000
    num_agent_steps_trained: 1452000
    num_steps_sampled: 1452000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1452,37620,1452000,-99.109,-84.7,-100,991.09


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1453000
  custom_metrics: {}
  date: 2021-10-22_06-13-01
  done: false
  episode_len_mean: 991.09
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -99.10899999999863
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4535
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0456175053368283e-45
          cur_lr: 5.000000000000001e-05
          entropy: 0.010236664644132058
          entropy_coeff: 0.009999999999999998
          kl: 4.2588571019436867e-05
          policy_loss: -0.051410412126117286
          total_loss: 0.7148481286234326
          vf_explained_var: 0.0027493780944496393
          vf_loss: 0.7663608921277855
    num_agent_steps_sampled: 1453000
    num_agent_steps_trained: 1453000
    num_steps_sampled: 1453000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1453,37629.4,1453000,-99.109,-84.7,-100,991.09


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1454000
  custom_metrics: {}
  date: 2021-10-22_06-13-10
  done: false
  episode_len_mean: 991.09
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -99.10899999999863
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4536
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0228087526684142e-45
          cur_lr: 5.000000000000001e-05
          entropy: 0.006770906121366554
          entropy_coeff: 0.009999999999999998
          kl: 6.167282857118354e-05
          policy_loss: -0.051742767956521774
          total_loss: 0.7339714349971878
          vf_explained_var: 0.002630081493407488
          vf_loss: 0.7857819071246518
    num_agent_steps_sampled: 1454000
    num_agent_steps_trained: 1454000
    num_steps_sampled: 1454000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1454,37638.1,1454000,-99.109,-84.7,-100,991.09


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1455000
  custom_metrics: {}
  date: 2021-10-22_06-13-20
  done: false
  episode_len_mean: 992.28
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -99.22799999999863
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4537
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.114043763342071e-46
          cur_lr: 5.000000000000001e-05
          entropy: 0.005287912550071875
          entropy_coeff: 0.009999999999999998
          kl: 2.3265803222970327e-06
          policy_loss: -0.05197433763080173
          total_loss: 0.7473949339654711
          vf_explained_var: 0.0026648654602468014
          vf_loss: 0.7994221512435211
    num_agent_steps_sampled: 1455000
    num_agent_steps_trained: 1455000
    num_steps_sampled: 1455000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1455,37647.6,1455000,-99.228,-84.7,-100,992.28


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1456000
  custom_metrics: {}
  date: 2021-10-22_06-13-28
  done: false
  episode_len_mean: 993.59
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -99.35899999999863
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4538
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5570218816710354e-46
          cur_lr: 5.000000000000001e-05
          entropy: 0.00434152212821775
          entropy_coeff: 0.009999999999999998
          kl: 1.1658237215373462e-05
          policy_loss: -0.05216428389151891
          total_loss: 0.7536125373509195
          vf_explained_var: 0.0027016990352422
          vf_loss: 0.8058202284491724
    num_agent_steps_sampled: 1456000
    num_agent_steps_trained: 1456000
    num_steps_sampled: 1456000
    num_steps_trained: 1456

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1456,37656.3,1456000,-99.359,-84.7,-100,993.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1457000
  custom_metrics: {}
  date: 2021-10-22_06-13-38
  done: false
  episode_len_mean: 993.94
  episode_media: {}
  episode_reward_max: -84.69999999999946
  episode_reward_mean: -99.39399999999863
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4539
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2785109408355177e-46
          cur_lr: 5.000000000000001e-05
          entropy: 0.0043775354149854845
          entropy_coeff: 0.009999999999999998
          kl: 7.526865689907088e-07
          policy_loss: -0.052208943996164534
          total_loss: 0.7552649802631802
          vf_explained_var: 0.0027768751606345177
          vf_loss: 0.8075176843338543
    num_agent_steps_sampled: 1457000
    num_agent_steps_trained: 1457000
    num_steps_sampled: 1457000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1457,37665.7,1457000,-99.394,-84.7,-100,993.94


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1458000
  custom_metrics: {}
  date: 2021-10-22_06-13-47
  done: false
  episode_len_mean: 995.47
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.54699999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4540
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.3925547041775885e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.004807083970970578
          entropy_coeff: 0.009999999999999998
          kl: 8.98295014750694e-07
          policy_loss: -0.05221733119752672
          total_loss: 0.7546380655633078
          vf_explained_var: 0.0028274378273636103
          vf_loss: 0.8069034600837364
    num_agent_steps_sampled: 1458000
    num_agent_steps_trained: 1458000
    num_steps_sampled: 1458000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1458,37674.7,1458000,-99.547,-85.4,-100,995.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1459000
  custom_metrics: {}
  date: 2021-10-22_06-13-56
  done: false
  episode_len_mean: 995.47
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.54699999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4541
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1962773520887942e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.004921933294584354
          entropy_coeff: 0.009999999999999998
          kl: 5.280187193010574e-07
          policy_loss: -0.052492544882827334
          total_loss: 0.7661280357175403
          vf_explained_var: 0.0028574983589351177
          vf_loss: 0.8186698026541207
    num_agent_steps_sampled: 1459000
    num_agent_steps_trained: 1459000
    num_steps_sampled: 1459000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1459,37684.1,1459000,-99.547,-85.4,-100,995.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1460000
  custom_metrics: {}
  date: 2021-10-22_06-14-05
  done: false
  episode_len_mean: 995.47
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.54699999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4542
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5981386760443971e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.005360142964248856
          entropy_coeff: 0.009999999999999998
          kl: 1.3159449754280356e-06
          policy_loss: -0.05248132265276379
          total_loss: 0.7641817293233342
          vf_explained_var: 0.002879199106246233
          vf_loss: 0.8167166559646527
    num_agent_steps_sampled: 1460000
    num_agent_steps_trained: 1460000
    num_steps_sampled: 1460000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1460,37693.1,1460000,-99.547,-85.4,-100,995.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1461000
  custom_metrics: {}
  date: 2021-10-22_06-14-14
  done: false
  episode_len_mean: 995.47
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.54699999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4543
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.990693380221986e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.00578800229769614
          entropy_coeff: 0.009999999999999998
          kl: 1.308321591614931e-06
          policy_loss: -0.052549437516265445
          total_loss: 0.7677399790949292
          vf_explained_var: 0.0029026323463767767
          vf_loss: 0.8203473111407624
    num_agent_steps_sampled: 1461000
    num_agent_steps_trained: 1461000
    num_steps_sampled: 1461000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1461,37702.2,1461000,-99.547,-85.4,-100,995.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1462000
  custom_metrics: {}
  date: 2021-10-22_06-14-24
  done: false
  episode_len_mean: 995.47
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.54699999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4544
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.995346690110993e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.008920032520674998
          entropy_coeff: 0.009999999999999998
          kl: 0.00040937680635297347
          policy_loss: -0.052860103713141544
          total_loss: 0.7789500059352981
          vf_explained_var: 0.0030654165893793106
          vf_loss: 0.8318992880276508
    num_agent_steps_sampled: 1462000
    num_agent_steps_trained: 1462000
    num_steps_sampled: 1462000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1462,37711.4,1462000,-99.547,-85.4,-100,995.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1463000
  custom_metrics: {}
  date: 2021-10-22_06-14-33
  done: false
  episode_len_mean: 995.47
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.54699999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4545
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9976733450554964e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.00684661571867764
          entropy_coeff: 0.009999999999999998
          kl: 6.900136425069567e-05
          policy_loss: -0.05307788964774873
          total_loss: 0.7872736200690269
          vf_explained_var: 0.0030451887287199497
          vf_loss: 0.8404199822702342
    num_agent_steps_sampled: 1463000
    num_agent_steps_trained: 1463000
    num_steps_sampled: 1463000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1463,37720.4,1463000,-99.547,-85.4,-100,995.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1464000
  custom_metrics: {}
  date: 2021-10-22_06-14-42
  done: false
  episode_len_mean: 995.47
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.54699999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4546
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.988366725277482e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.005714751267805696
          entropy_coeff: 0.009999999999999998
          kl: 7.575108250541653e-06
          policy_loss: -0.05297087960773044
          total_loss: 0.781698561542564
          vf_explained_var: 0.0029951254837214947
          vf_loss: 0.8347265927328004
    num_agent_steps_sampled: 1464000
    num_agent_steps_trained: 1464000
    num_steps_sampled: 1464000
    num_steps_trained: 146

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1464,37729.8,1464000,-99.547,-85.4,-100,995.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1465000
  custom_metrics: {}
  date: 2021-10-22_06-14-51
  done: false
  episode_len_mean: 995.47
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.54699999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4547
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.994183362638741e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.0059849084510157505
          entropy_coeff: 0.009999999999999998
          kl: 2.879794260754773e-06
          policy_loss: -0.053079870177639855
          total_loss: 0.7902683196796312
          vf_explained_var: 0.002967393957078457
          vf_loss: 0.8434080343900455
    num_agent_steps_sampled: 1465000
    num_agent_steps_trained: 1465000
    num_steps_sampled: 1465000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1465,37738.8,1465000,-99.547,-85.4,-100,995.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1466000
  custom_metrics: {}
  date: 2021-10-22_06-15-01
  done: false
  episode_len_mean: 995.47
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.54699999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4548
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4970916813193705e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.006776392123558455
          entropy_coeff: 0.009999999999999998
          kl: 6.667865240276664e-06
          policy_loss: -0.05315706448422538
          total_loss: 0.7946333891815609
          vf_explained_var: 0.0029623357113450766
          vf_loss: 0.847858216220306
    num_agent_steps_sampled: 1466000
    num_agent_steps_trained: 1466000
    num_steps_sampled: 1466000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1466,37748.4,1466000,-99.547,-85.4,-100,995.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1467000
  custom_metrics: {}
  date: 2021-10-22_06-15-09
  done: false
  episode_len_mean: 995.47
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.54699999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4549
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2485458406596853e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.00813002021362384
          entropy_coeff: 0.009999999999999998
          kl: 3.6450520751691368e-06
          policy_loss: -0.053050596515337625
          total_loss: 0.7887861892580986
          vf_explained_var: 0.002959689823910594
          vf_loss: 0.8419180861363809
    num_agent_steps_sampled: 1467000
    num_agent_steps_trained: 1467000
    num_steps_sampled: 1467000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1467,37757.2,1467000,-99.547,-85.4,-100,995.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1468000
  custom_metrics: {}
  date: 2021-10-22_06-15-19
  done: false
  episode_len_mean: 995.47
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.54699999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4550
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.242729203298426e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.008998402094261514
          entropy_coeff: 0.009999999999999998
          kl: 7.50799803470222e-06
          policy_loss: -0.05314694692691167
          total_loss: 0.7940467097693019
          vf_explained_var: 0.0029490001033991575
          vf_loss: 0.8472836291210519
    num_agent_steps_sampled: 1468000
    num_agent_steps_trained: 1468000
    num_steps_sampled: 1468000
    num_steps_trained: 146

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1468,37766.8,1468000,-99.547,-85.4,-100,995.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1469000
  custom_metrics: {}
  date: 2021-10-22_06-15-28
  done: false
  episode_len_mean: 995.47
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.54699999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4551
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.121364601649213e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.010693373086137905
          entropy_coeff: 0.009999999999999998
          kl: 1.0056074133500835e-05
          policy_loss: -0.053163185715675354
          total_loss: 0.7955880456500584
          vf_explained_var: 0.0029446741100400686
          vf_loss: 0.8488581758613388
    num_agent_steps_sampled: 1469000
    num_agent_steps_trained: 1469000
    num_steps_sampled: 1469000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1469,37775.6,1469000,-99.547,-85.4,-100,995.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1470000
  custom_metrics: {}
  date: 2021-10-22_06-15-37
  done: false
  episode_len_mean: 995.47
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.54699999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4552
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5606823008246066e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.009705249380527272
          entropy_coeff: 0.009999999999999998
          kl: 2.2177187516640254e-05
          policy_loss: -0.053393379516071746
          total_loss: 0.8034085437655449
          vf_explained_var: 0.0029471456073224545
          vf_loss: 0.8568989736752377
    num_agent_steps_sampled: 1470000
    num_agent_steps_trained: 1470000
    num_steps_sampled: 1470000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1470,37785.2,1470000,-99.547,-85.4,-100,995.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1471000
  custom_metrics: {}
  date: 2021-10-22_06-15-46
  done: false
  episode_len_mean: 995.47
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.54699999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4553
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.803411504123033e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.008396877420859204
          entropy_coeff: 0.009999999999999998
          kl: 1.37700471761093e-05
          policy_loss: -0.05343009548054801
          total_loss: 0.8046178660458989
          vf_explained_var: 0.0029578208923339844
          vf_loss: 0.8581319333157605
    num_agent_steps_sampled: 1471000
    num_agent_steps_trained: 1471000
    num_steps_sampled: 1471000
    num_steps_trained: 147

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1471,37794,1471000,-99.547,-85.4,-100,995.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1472000
  custom_metrics: {}
  date: 2021-10-22_06-15-56
  done: false
  episode_len_mean: 995.47
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.54699999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4554
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.9017057520615164e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.00955005262254013
          entropy_coeff: 0.009999999999999998
          kl: 7.226462812360341e-06
          policy_loss: -0.05319880296786626
          total_loss: 0.7954358375734754
          vf_explained_var: 0.0029519139789044857
          vf_loss: 0.8487301264165177
    num_agent_steps_sampled: 1472000
    num_agent_steps_trained: 1472000
    num_steps_sampled: 1472000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1472,37803.5,1472000,-99.547,-85.4,-100,995.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1473000
  custom_metrics: {}
  date: 2021-10-22_06-16-05
  done: false
  episode_len_mean: 995.47
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.54699999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4555
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9508528760307582e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.008529726700443361
          entropy_coeff: 0.009999999999999998
          kl: 5.428483483761336e-05
          policy_loss: -0.053296972480085164
          total_loss: 0.7947971628771888
          vf_explained_var: 0.0029560155235230923
          vf_loss: 0.8481794290658501
    num_agent_steps_sampled: 1473000
    num_agent_steps_trained: 1473000
    num_steps_sampled: 1473000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1473,37812.4,1473000,-99.547,-85.4,-100,995.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1474000
  custom_metrics: {}
  date: 2021-10-22_06-16-14
  done: false
  episode_len_mean: 995.51
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.55099999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4556
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.754264380153791e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.010688092833798792
          entropy_coeff: 0.009999999999999998
          kl: 0.00015629218497269503
          policy_loss: -0.053379487494627634
          total_loss: 0.7991305670804447
          vf_explained_var: 0.0031386574264615774
          vf_loss: 0.8526169403352671
    num_agent_steps_sampled: 1474000
    num_agent_steps_trained: 1474000
    num_steps_sampled: 1474000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1474,37821.5,1474000,-99.551,-85.4,-100,995.51


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1475000
  custom_metrics: {}
  date: 2021-10-22_06-16-23
  done: false
  episode_len_mean: 995.59
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.55899999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4557
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.8771321900768955e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.008124242562593685
          entropy_coeff: 0.009999999999999998
          kl: 7.02334256747545e-06
          policy_loss: -0.05329600208335453
          total_loss: 0.7995781570672988
          vf_explained_var: 0.002995733404532075
          vf_loss: 0.8529553961629669
    num_agent_steps_sampled: 1475000
    num_agent_steps_trained: 1475000
    num_steps_sampled: 1475000
    num_steps_trained: 147

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1475,37830.6,1475000,-99.559,-85.4,-100,995.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1476000
  custom_metrics: {}
  date: 2021-10-22_06-16-32
  done: false
  episode_len_mean: 995.59
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.55899999999863
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4558
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4385660950384478e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.01015790816810396
          entropy_coeff: 0.009999999999999998
          kl: 1.706769548575071e-05
          policy_loss: -0.05325441492928399
          total_loss: 0.7984834002123938
          vf_explained_var: 0.0029660919681191444
          vf_loss: 0.8518393910386496
    num_agent_steps_sampled: 1476000
    num_agent_steps_trained: 1476000
    num_steps_sampled: 1476000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1476,37839.6,1476000,-99.559,-85.4,-100,995.59




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1477000
  custom_metrics: {}
  date: 2021-10-22_06-16-58
  done: false
  episode_len_mean: 994.17
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.41699999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4560
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2192830475192239e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.0456343631260097
          entropy_coeff: 0.009999999999999998
          kl: 0.01109344810516249
          policy_loss: 0.09659861938820945
          total_loss: 0.9446870293882158
          vf_explained_var: 0.0030627469532191753
          vf_loss: 0.8485447786541449
    num_agent_steps_sampled: 1477000
    num_agent_steps_trained: 1477000
    num_steps_sampled: 1477000
    num_steps_trained: 147700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1477,37865.6,1477000,-99.417,-85.4,-100,994.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1478000
  custom_metrics: {}
  date: 2021-10-22_06-17-09
  done: false
  episode_len_mean: 994.17
  episode_media: {}
  episode_reward_max: -85.39999999999942
  episode_reward_mean: -99.41699999999862
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4561
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2192830475192239e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.754399281905757
          entropy_coeff: 0.009999999999999998
          kl: 0.18361374688934143
          policy_loss: 0.17880929112434388
          total_loss: 0.1752735076679124
          vf_explained_var: 0.08933503180742264
          vf_loss: 0.0040082088180093305
    num_agent_steps_sampled: 1478000
    num_agent_steps_trained: 1478000
    num_steps_sampled: 1478000
    num_steps_trained: 147800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1478,37876.2,1478000,-99.417,-85.4,-100,994.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1479000
  custom_metrics: {}
  date: 2021-10-22_06-17-24
  done: false
  episode_len_mean: 989.25
  episode_media: {}
  episode_reward_max: -50.80000000000045
  episode_reward_mean: -98.92499999999868
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4562
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8289245712788354e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.9285925620132023
          entropy_coeff: 0.009999999999999998
          kl: 0.08873862246794538
          policy_loss: -0.0333350314034356
          total_loss: 0.786040122807026
          vf_explained_var: -0.46647965908050537
          vf_loss: 0.8286610882354176
    num_agent_steps_sampled: 1479000
    num_agent_steps_trained: 1479000
    num_steps_sampled: 1479000
    num_steps_trained: 1479000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1479,37891.4,1479000,-98.925,-50.8,-100,989.25


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1480000
  custom_metrics: {}
  date: 2021-10-22_06-17-43
  done: false
  episode_len_mean: 980.69
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -98.06899999999871
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4564
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7433868569182532e-52
          cur_lr: 5.000000000000001e-05
          entropy: 1.0932299143738218
          entropy_coeff: 0.009999999999999998
          kl: 0.09828053180475914
          policy_loss: -0.049867290258407596
          total_loss: 1.2914601580964193
          vf_explained_var: 0.10224444419145584
          vf_loss: 1.3522597473114728
    num_agent_steps_sampled: 1480000
    num_agent_steps_trained: 1480000
    num_steps_sampled: 1480000
    num_steps_trained: 14800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1480,37910.8,1480000,-98.069,-41.2,-100,980.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1481000
  custom_metrics: {}
  date: 2021-10-22_06-17-57
  done: false
  episode_len_mean: 977.81
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -97.78099999999873
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4565
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.11508028537738e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.7408890224165386
          entropy_coeff: 0.009999999999999998
          kl: 0.24966701528804447
          policy_loss: -0.024298618692490788
          total_loss: 0.8862916827201843
          vf_explained_var: -0.18891023099422455
          vf_loss: 0.9179991819378402
    num_agent_steps_sampled: 1481000
    num_agent_steps_trained: 1481000
    num_steps_sampled: 1481000
    num_steps_trained: 148100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1481,37924.9,1481000,-97.781,-41.2,-100,977.81


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1482000
  custom_metrics: {}
  date: 2021-10-22_06-18-09
  done: false
  episode_len_mean: 975.97
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -97.59699999999876
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4566
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.172620428066071e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.6689906345473395
          entropy_coeff: 0.009999999999999998
          kl: 0.023207783162697698
          policy_loss: -0.06357160872883266
          total_loss: 0.5850782371229596
          vf_explained_var: 0.03967297077178955
          vf_loss: 0.655339750519488
    num_agent_steps_sampled: 1482000
    num_agent_steps_trained: 1482000
    num_steps_sampled: 1482000
    num_steps_trained: 1482000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1482,37936.6,1482000,-97.597,-41.2,-100,975.97


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1483000
  custom_metrics: {}
  date: 2021-10-22_06-18-21
  done: false
  episode_len_mean: 975.77
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -97.57699999999873
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4567
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.258930642099106e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.5506368408600489
          entropy_coeff: 0.009999999999999998
          kl: 0.0418127260670337
          policy_loss: -0.06393475648429658
          total_loss: 0.6605921119451523
          vf_explained_var: -0.044755131006240845
          vf_loss: 0.7300332290430863
    num_agent_steps_sampled: 1483000
    num_agent_steps_trained: 1483000
    num_steps_sampled: 1483000
    num_steps_trained: 148300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1483,37948.8,1483000,-97.577,-41.2,-100,975.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1484000
  custom_metrics: {}
  date: 2021-10-22_06-18-33
  done: false
  episode_len_mean: 975.69
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -97.56899999999874
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4568
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3888395963148662e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.3478819060656759
          entropy_coeff: 0.009999999999999998
          kl: 0.09683763252356524
          policy_loss: -0.07029530803362528
          total_loss: 0.5807226224078073
          vf_explained_var: -0.19732479751110077
          vf_loss: 0.6544967372798257
    num_agent_steps_sampled: 1484000
    num_agent_steps_trained: 1484000
    num_steps_sampled: 1484000
    num_steps_trained: 14840

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1484,37960.1,1484000,-97.569,-41.2,-100,975.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1485000
  custom_metrics: {}
  date: 2021-10-22_06-18-43
  done: false
  episode_len_mean: 975.57
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -97.55699999999874
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4569
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0832593944722985e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.6354735407564375
          entropy_coeff: 0.009999999999999998
          kl: 0.04358876201143573
          policy_loss: -0.05450026359823015
          total_loss: 0.5581438158949216
          vf_explained_var: 0.035566654056310654
          vf_loss: 0.6189988119450087
    num_agent_steps_sampled: 1485000
    num_agent_steps_trained: 1485000
    num_steps_sampled: 1485000
    num_steps_trained: 14850

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1485,37970.7,1485000,-97.557,-41.2,-100,975.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1486000
  custom_metrics: {}
  date: 2021-10-22_06-18-55
  done: false
  episode_len_mean: 974.83
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -97.48299999999873
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4571
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.124889091708449e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.7613971014817555
          entropy_coeff: 0.009999999999999998
          kl: 0.007569183781280091
          policy_loss: 0.13699192644821273
          total_loss: 0.726822788019975
          vf_explained_var: 0.019471846520900726
          vf_loss: 0.5974448407896691
    num_agent_steps_sampled: 1486000
    num_agent_steps_trained: 1486000
    num_steps_sampled: 1486000
    num_steps_trained: 1486000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1486,37981.9,1486000,-97.483,-41.2,-100,974.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1487000
  custom_metrics: {}
  date: 2021-10-22_06-19-05
  done: false
  episode_len_mean: 973.79
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -97.37899999999873
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4572
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.124889091708449e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.8886201891634199
          entropy_coeff: 0.009999999999999998
          kl: 0.011304430554892204
          policy_loss: 0.10325981072253651
          total_loss: 0.22440030674139658
          vf_explained_var: -0.00039726230897940695
          vf_loss: 0.1300267001006028
    num_agent_steps_sampled: 1487000
    num_agent_steps_trained: 1487000
    num_steps_sampled: 1487000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1487,37991.9,1487000,-97.379,-41.2,-100,973.79


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1488000
  custom_metrics: {}
  date: 2021-10-22_06-19-16
  done: false
  episode_len_mean: 972.37
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -97.23699999999873
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4573
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.124889091708449e-51
          cur_lr: 5.000000000000001e-05
          entropy: 1.2189856217967139
          entropy_coeff: 0.009999999999999998
          kl: 0.021260705182834905
          policy_loss: -0.052491822838783266
          total_loss: 0.7653489180737072
          vf_explained_var: 0.020640911534428596
          vf_loss: 0.8300305870465107
    num_agent_steps_sampled: 1488000
    num_agent_steps_trained: 1488000
    num_steps_sampled: 1488000
    num_steps_trained: 1488

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1488,38003.3,1488000,-97.237,-41.2,-100,972.37


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1489000
  custom_metrics: {}
  date: 2021-10-22_06-19-27
  done: false
  episode_len_mean: 970.62
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -97.06199999999873
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4574
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.6873336375626733e-51
          cur_lr: 5.000000000000001e-05
          entropy: 1.0365162167284223
          entropy_coeff: 0.009999999999999998
          kl: 0.06330927412373942
          policy_loss: -0.06055529647403293
          total_loss: 0.7684337599409952
          vf_explained_var: 0.01184302568435669
          vf_loss: 0.8393542127062877
    num_agent_steps_sampled: 1489000
    num_agent_steps_trained: 1489000
    num_steps_sampled: 1489000
    num_steps_trained: 148900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1489,38014.4,1489000,-97.062,-41.2,-100,970.62


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1490000
  custom_metrics: {}
  date: 2021-10-22_06-19-38
  done: false
  episode_len_mean: 968.76
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -96.87599999999877
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4575
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.031000456344008e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.8340527812639872
          entropy_coeff: 0.009999999999999998
          kl: 0.018048070426703255
          policy_loss: -0.06454567681584093
          total_loss: 0.7674393879042731
          vf_explained_var: 0.026719363406300545
          vf_loss: 0.8403256007366711
    num_agent_steps_sampled: 1490000
    num_agent_steps_trained: 1490000
    num_steps_sampled: 1490000
    num_steps_trained: 14900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1490,38025.3,1490000,-96.876,-41.2,-100,968.76


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1491000
  custom_metrics: {}
  date: 2021-10-22_06-19-48
  done: false
  episode_len_mean: 967.68
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -96.76799999999875
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4576
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.031000456344008e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.7994054251246983
          entropy_coeff: 0.009999999999999998
          kl: 0.003868367453662813
          policy_loss: -0.056091054280598955
          total_loss: 0.7739829142888387
          vf_explained_var: 0.021482236683368683
          vf_loss: 0.8380680374387238
    num_agent_steps_sampled: 1491000
    num_agent_steps_trained: 1491000
    num_steps_sampled: 1491000
    num_steps_trained: 1491

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1491,38035.4,1491000,-96.768,-41.2,-100,967.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1492000
  custom_metrics: {}
  date: 2021-10-22_06-19-59
  done: false
  episode_len_mean: 965.63
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -96.56299999999878
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4578
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.515500228172004e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.7852918578518762
          entropy_coeff: 0.009999999999999998
          kl: 0.003485835290820593
          policy_loss: 0.09460607800218794
          total_loss: 0.9166743063264423
          vf_explained_var: 0.018013732507824898
          vf_loss: 0.829921132264038
    num_agent_steps_sampled: 1492000
    num_agent_steps_trained: 1492000
    num_steps_sampled: 1492000
    num_steps_trained: 1492000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1492,38046.5,1492000,-96.563,-41.2,-100,965.63


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1493000
  custom_metrics: {}
  date: 2021-10-22_06-20-10
  done: false
  episode_len_mean: 964.38
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -96.4379999999988
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4579
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.757750114086002e-51
          cur_lr: 5.000000000000001e-05
          entropy: 1.0211821761396196
          entropy_coeff: 0.009999999999999998
          kl: 0.06525926044973573
          policy_loss: 0.1961344364616606
          total_loss: 0.19053245700067944
          vf_explained_var: -0.16944684088230133
          vf_loss: 0.004609842616547313
    num_agent_steps_sampled: 1493000
    num_agent_steps_trained: 1493000
    num_steps_sampled: 1493000
    num_steps_trained: 149300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1493,38057.4,1493000,-96.438,-41.2,-100,964.38


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1494000
  custom_metrics: {}
  date: 2021-10-22_06-20-22
  done: false
  episode_len_mean: 964.18
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -96.4179999999988
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4580
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6366251711290026e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.7498553061650859
          entropy_coeff: 0.009999999999999998
          kl: 0.3202975632711677
          policy_loss: 0.1510233336024814
          total_loss: 0.1484919980996185
          vf_explained_var: -1.0
          vf_loss: 0.00496721781997217
    num_agent_steps_sampled: 1494000
    num_agent_steps_trained: 1494000
    num_steps_sampled: 1494000
    num_steps_trained: 1494000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1494,38068.8,1494000,-96.418,-41.2,-100,964.18


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1495000
  custom_metrics: {}
  date: 2021-10-22_06-20-32
  done: false
  episode_len_mean: 963.32
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -96.33199999999879
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4581
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.9549377566935047e-51
          cur_lr: 5.000000000000001e-05
          entropy: 1.0002185298336876
          entropy_coeff: 0.009999999999999998
          kl: 0.012302140710384416
          policy_loss: -0.05399556193086836
          total_loss: 0.7865325783689817
          vf_explained_var: 0.00969680119305849
          vf_loss: 0.8505302995029423
    num_agent_steps_sampled: 1495000
    num_agent_steps_trained: 1495000
    num_steps_sampled: 1495000
    num_steps_trained: 14950

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1495,38078.9,1495000,-96.332,-41.2,-100,963.32


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1496000
  custom_metrics: {}
  date: 2021-10-22_06-20-42
  done: false
  episode_len_mean: 962.43
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -96.24299999999879
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4582
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.9549377566935047e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.927989266316096
          entropy_coeff: 0.009999999999999998
          kl: 0.011397796581653729
          policy_loss: -0.05154609696732627
          total_loss: 0.7909282949235704
          vf_explained_var: -0.011950627900660038
          vf_loss: 0.8517542911693454
    num_agent_steps_sampled: 1496000
    num_agent_steps_trained: 1496000
    num_steps_sampled: 1496000
    num_steps_trained: 1496

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1496,38088.9,1496000,-96.243,-41.2,-100,962.43


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1497000
  custom_metrics: {}
  date: 2021-10-22_06-20-52
  done: false
  episode_len_mean: 961.66
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -96.1659999999988
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4583
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.9549377566935047e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.8194641490777334
          entropy_coeff: 0.009999999999999998
          kl: 0.0083359878646642
          policy_loss: -0.05301778697305255
          total_loss: 0.7866398377550973
          vf_explained_var: 0.004615990445017815
          vf_loss: 0.847852260660794
    num_agent_steps_sampled: 1497000
    num_agent_steps_trained: 1497000
    num_steps_sampled: 1497000
    num_steps_trained: 1497000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1497,38098.9,1497000,-96.166,-41.2,-100,961.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1498000
  custom_metrics: {}
  date: 2021-10-22_06-21-02
  done: false
  episode_len_mean: 961.14
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -96.1139999999988
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4584
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.9549377566935047e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.8675460729334089
          entropy_coeff: 0.009999999999999998
          kl: 0.00440100267342258
          policy_loss: -0.055621848752101265
          total_loss: 0.7799019407894876
          vf_explained_var: 0.0021086540073156357
          vf_loss: 0.8441992443054914
    num_agent_steps_sampled: 1498000
    num_agent_steps_trained: 1498000
    num_steps_sampled: 1498000
    num_steps_trained: 1498

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1498,38108.9,1498000,-96.114,-41.2,-100,961.14


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1499000
  custom_metrics: {}
  date: 2021-10-22_06-21-11
  done: false
  episode_len_mean: 960.57
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -96.05699999999881
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4585
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9774688783467524e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.8549577057361603
          entropy_coeff: 0.009999999999999998
          kl: 0.0062057252125997464
          policy_loss: -0.05373577078183492
          total_loss: 0.7859500942958726
          vf_explained_var: 0.0028176598716527224
          vf_loss: 0.8482354282918904
    num_agent_steps_sampled: 1499000
    num_agent_steps_trained: 1499000
    num_steps_sampled: 1499000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1499,38118.6,1499000,-96.057,-41.2,-100,960.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1500000
  custom_metrics: {}
  date: 2021-10-22_06-21-21
  done: false
  episode_len_mean: 960.16
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -96.01599999999883
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4586
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9774688783467524e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.8512114948696561
          entropy_coeff: 0.009999999999999998
          kl: 0.013087060838656978
          policy_loss: -0.045237152609560224
          total_loss: 0.7415038345588578
          vf_explained_var: 0.0032653524540364742
          vf_loss: 0.7952531057306462
    num_agent_steps_sampled: 1500000
    num_agent_steps_trained: 1500000
    num_steps_sampled: 1500000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1500,38128.4,1500000,-96.016,-41.2,-100,960.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1501000
  custom_metrics: {}
  date: 2021-10-22_06-21-31
  done: false
  episode_len_mean: 960.02
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -96.00199999999883
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4587
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9774688783467524e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.7534349852138096
          entropy_coeff: 0.009999999999999998
          kl: 0.036012038249012654
          policy_loss: -0.03236168796817462
          total_loss: 0.42455967937906586
          vf_explained_var: 0.001530942041426897
          vf_loss: 0.4644557191951511
    num_agent_steps_sampled: 1501000
    num_agent_steps_trained: 1501000
    num_steps_sampled: 1501000
    num_steps_trained: 150

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1501,38137.9,1501000,-96.002,-41.2,-100,960.02


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1502000
  custom_metrics: {}
  date: 2021-10-22_06-21-41
  done: false
  episode_len_mean: 959.59
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.95899999999881
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4588
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.966203317520129e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.7720306634902954
          entropy_coeff: 0.009999999999999998
          kl: 0.0011717943208056723
          policy_loss: -0.055361448062790766
          total_loss: 0.8140372099147902
          vf_explained_var: 0.004348163027316332
          vf_loss: 0.8771189585948984
    num_agent_steps_sampled: 1502000
    num_agent_steps_trained: 1502000
    num_steps_sampled: 1502000
    num_steps_trained: 150

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1502,38148,1502000,-95.959,-41.2,-100,959.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1503000
  custom_metrics: {}
  date: 2021-10-22_06-21-51
  done: false
  episode_len_mean: 959.31
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.93099999999882
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4589
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4831016587600644e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.697715323501163
          entropy_coeff: 0.009999999999999998
          kl: 0.009325649589432633
          policy_loss: -0.05865948266453213
          total_loss: 0.8132785985867182
          vf_explained_var: 0.0032941673416644335
          vf_loss: 0.8789152533022894
    num_agent_steps_sampled: 1503000
    num_agent_steps_trained: 1503000
    num_steps_sampled: 1503000
    num_steps_trained: 1503

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1503,38157.7,1503000,-95.931,-41.2,-100,959.31




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1504000
  custom_metrics: {}
  date: 2021-10-22_06-22-18
  done: false
  episode_len_mean: 957.69
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.76899999999883
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4590
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4831016587600644e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.5794207712014516
          entropy_coeff: 0.009999999999999998
          kl: 0.01177005257260867
          policy_loss: -0.059595794644620684
          total_loss: 0.8090768100486861
          vf_explained_var: 0.0035095757339149714
          vf_loss: 0.8744668073124355
    num_agent_steps_sampled: 1504000
    num_agent_steps_trained: 1504000
    num_steps_sampled: 1504000
    num_steps_trained: 150

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1504,38185.5,1504000,-95.769,-41.2,-100,957.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1505000
  custom_metrics: {}
  date: 2021-10-22_06-22-28
  done: false
  episode_len_mean: 957.34
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.73399999999882
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4591
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4831016587600644e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.5572349035077625
          entropy_coeff: 0.009999999999999998
          kl: 0.008753015629460808
          policy_loss: -0.05100257760948605
          total_loss: 0.811773074666659
          vf_explained_var: 0.0051551396027207375
          vf_loss: 0.868348007152478
    num_agent_steps_sampled: 1505000
    num_agent_steps_trained: 1505000
    num_steps_sampled: 1505000
    num_steps_trained: 15050

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1505,38195.2,1505000,-95.734,-41.2,-100,957.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1506000
  custom_metrics: {}
  date: 2021-10-22_06-22-38
  done: false
  episode_len_mean: 957.04
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.70399999999883
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4592
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4831016587600644e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.48189437521828543
          entropy_coeff: 0.009999999999999998
          kl: 0.003836760386838023
          policy_loss: -0.0564313777618938
          total_loss: 0.8071066973937883
          vf_explained_var: 0.005581165663897991
          vf_loss: 0.8683570110342569
    num_agent_steps_sampled: 1506000
    num_agent_steps_trained: 1506000
    num_steps_sampled: 1506000
    num_steps_trained: 1506

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1506,38205.2,1506000,-95.704,-41.2,-100,957.04


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1507000
  custom_metrics: {}
  date: 2021-10-22_06-22-48
  done: false
  episode_len_mean: 957.04
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.70399999999881
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4593
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.415508293800322e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.5010612265931236
          entropy_coeff: 0.009999999999999998
          kl: 0.004821941734489086
          policy_loss: -0.05534032632907231
          total_loss: 0.8087898254394531
          vf_explained_var: 0.005658735055476427
          vf_loss: 0.8691407536052995
    num_agent_steps_sampled: 1507000
    num_agent_steps_trained: 1507000
    num_steps_sampled: 1507000
    num_steps_trained: 15070

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1507,38215,1507000,-95.704,-41.2,-100,957.04


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1508000
  custom_metrics: {}
  date: 2021-10-22_06-22-58
  done: false
  episode_len_mean: 956.95
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.69499999999881
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4594
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.707754146900161e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.5352031389872233
          entropy_coeff: 0.009999999999999998
          kl: 0.004722889150988547
          policy_loss: -0.05614487263891432
          total_loss: 0.8031641662120819
          vf_explained_var: -0.018561329692602158
          vf_loss: 0.8646610552858975
    num_agent_steps_sampled: 1508000
    num_agent_steps_trained: 1508000
    num_steps_sampled: 1508000
    num_steps_trained: 1508

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1508,38225.2,1508000,-95.695,-41.2,-100,956.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1509000
  custom_metrics: {}
  date: 2021-10-22_06-23-08
  done: false
  episode_len_mean: 956.95
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.6949999999988
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4595
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8538770734500805e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.5389402267005708
          entropy_coeff: 0.009999999999999998
          kl: 0.003501819568561615
          policy_loss: -0.05353629340728124
          total_loss: 0.8035790433486303
          vf_explained_var: 0.005705735646188259
          vf_loss: 0.8625047476962209
    num_agent_steps_sampled: 1509000
    num_agent_steps_trained: 1509000
    num_steps_sampled: 1509000
    num_steps_trained: 15090

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1509,38234.9,1509000,-95.695,-41.2,-100,956.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1510000
  custom_metrics: {}
  date: 2021-10-22_06-23-18
  done: false
  episode_len_mean: 956.76
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.67599999999882
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4596
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.269385367250403e-53
          cur_lr: 5.000000000000001e-05
          entropy: 0.5311014228396945
          entropy_coeff: 0.009999999999999998
          kl: 0.0028238834860406965
          policy_loss: -0.05377903547551897
          total_loss: 0.805493005282349
          vf_explained_var: 0.003483645850792527
          vf_loss: 0.8645830616768864
    num_agent_steps_sampled: 1510000
    num_agent_steps_trained: 1510000
    num_steps_sampled: 1510000
    num_steps_trained: 15100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1510,38245,1510000,-95.676,-41.2,-100,956.76


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1511000
  custom_metrics: {}
  date: 2021-10-22_06-23-28
  done: false
  episode_len_mean: 956.66
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.66599999999882
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4597
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.634692683625201e-53
          cur_lr: 5.000000000000001e-05
          entropy: 0.48017626570330724
          entropy_coeff: 0.009999999999999998
          kl: 0.006703107824936675
          policy_loss: -0.056048813296688926
          total_loss: 0.8018272573749224
          vf_explained_var: 0.0039816563948988914
          vf_loss: 0.8626778301886386
    num_agent_steps_sampled: 1511000
    num_agent_steps_trained: 1511000
    num_steps_sampled: 1511000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1511,38254.6,1511000,-95.666,-41.2,-100,956.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1512000
  custom_metrics: {}
  date: 2021-10-22_06-23-38
  done: false
  episode_len_mean: 956.6
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.6599999999988
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4598
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.634692683625201e-53
          cur_lr: 5.000000000000001e-05
          entropy: 0.4411786029736201
          entropy_coeff: 0.009999999999999998
          kl: 0.0027220200226505792
          policy_loss: -0.05338757865958744
          total_loss: 0.8072150727113088
          vf_explained_var: 0.0033468068577349186
          vf_loss: 0.8650144304873215
    num_agent_steps_sampled: 1512000
    num_agent_steps_trained: 1512000
    num_steps_sampled: 1512000
    num_steps_trained: 15120

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1512,38264.7,1512000,-95.66,-41.2,-100,956.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1513000
  custom_metrics: {}
  date: 2021-10-22_06-23-47
  done: false
  episode_len_mean: 956.56
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.6559999999988
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4599
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3173463418126006e-53
          cur_lr: 5.000000000000001e-05
          entropy: 0.38314375281333923
          entropy_coeff: 0.009999999999999998
          kl: 0.0030394553157021974
          policy_loss: -0.054217180278566146
          total_loss: 0.805531041820844
          vf_explained_var: 0.004108671564608812
          vf_loss: 0.8635796759484543
    num_agent_steps_sampled: 1513000
    num_agent_steps_trained: 1513000
    num_steps_sampled: 1513000
    num_steps_trained: 151

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1513,38274.3,1513000,-95.656,-41.2,-100,956.56


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1514000
  custom_metrics: {}
  date: 2021-10-22_06-23-58
  done: false
  episode_len_mean: 957.87
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.78699999999878
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4600
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1586731709063003e-53
          cur_lr: 5.000000000000001e-05
          entropy: 0.37010022269354925
          entropy_coeff: 0.009999999999999998
          kl: 0.00010151693983960147
          policy_loss: -0.05162769224908617
          total_loss: 0.8069479134347703
          vf_explained_var: 0.004601169843226671
          vf_loss: 0.8622766074414054
    num_agent_steps_sampled: 1514000
    num_agent_steps_trained: 1514000
    num_steps_sampled: 1514000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1514,38284.4,1514000,-95.787,-41.2,-100,957.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1515000
  custom_metrics: {}
  date: 2021-10-22_06-24-07
  done: false
  episode_len_mean: 957.76
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.77599999999879
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4601
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.793365854531502e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.4122806641790602
          entropy_coeff: 0.009999999999999998
          kl: 0.0026326906815914777
          policy_loss: -0.050616522464487286
          total_loss: 0.8038929765423138
          vf_explained_var: 0.0047631654888391495
          vf_loss: 0.8586323014563985
    num_agent_steps_sampled: 1515000
    num_agent_steps_trained: 1515000
    num_steps_sampled: 1515000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1515,38294.1,1515000,-95.776,-41.2,-100,957.76


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1516000
  custom_metrics: {}
  date: 2021-10-22_06-24-17
  done: false
  episode_len_mean: 957.73
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.77299999999882
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4602
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.896682927265751e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.27065049923128554
          entropy_coeff: 0.009999999999999998
          kl: 0.025059685088167624
          policy_loss: -0.053401602804660796
          total_loss: 0.809190250436465
          vf_explained_var: 0.004125352017581463
          vf_loss: 0.8652983494516876
    num_agent_steps_sampled: 1516000
    num_agent_steps_trained: 1516000
    num_steps_sampled: 1516000
    num_steps_trained: 1516

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1516,38304.2,1516000,-95.773,-41.2,-100,957.73


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1517000
  custom_metrics: {}
  date: 2021-10-22_06-24-27
  done: false
  episode_len_mean: 957.61
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.7609999999988
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4603
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.345024390898627e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.19914345956510968
          entropy_coeff: 0.009999999999999998
          kl: 0.010177005825823215
          policy_loss: -0.04557796153757307
          total_loss: 0.791251636048158
          vf_explained_var: 0.0048344312235713005
          vf_loss: 0.8388210246960323
    num_agent_steps_sampled: 1517000
    num_agent_steps_trained: 1517000
    num_steps_sampled: 1517000
    num_steps_trained: 15170

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1517,38314,1517000,-95.761,-41.2,-100,957.61


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1518000
  custom_metrics: {}
  date: 2021-10-22_06-24-37
  done: false
  episode_len_mean: 957.54
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.75399999999883
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4604
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.345024390898627e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.16401406410667632
          entropy_coeff: 0.009999999999999998
          kl: 0.0010471592834626435
          policy_loss: -0.03769410534037484
          total_loss: 0.7407627544469304
          vf_explained_var: 0.0037851869128644466
          vf_loss: 0.7800969948164291
    num_agent_steps_sampled: 1518000
    num_agent_steps_trained: 1518000
    num_steps_sampled: 1518000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1518,38324.2,1518000,-95.754,-41.2,-100,957.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1519000
  custom_metrics: {}
  date: 2021-10-22_06-24-47
  done: false
  episode_len_mean: 957.43
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.74299999999882
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 1
  episodes_total: 4605
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1725121954493134e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.13810266132156054
          entropy_coeff: 0.009999999999999998
          kl: 0.0015088250584540219
          policy_loss: -0.022553326934576033
          total_loss: 0.4524717821015252
          vf_explained_var: 0.003173102857545018
          vf_loss: 0.47640613423944966
    num_agent_steps_sampled: 1519000
    num_agent_steps_trained: 1519000
    num_steps_sampled: 1519000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1519,38334,1519000,-95.743,-41.2,-100,957.43


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1520000
  custom_metrics: {}
  date: 2021-10-22_06-24-58
  done: false
  episode_len_mean: 957.33
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -95.7329999999988
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4607
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0862560977246567e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.2148801484869586
          entropy_coeff: 0.009999999999999998
          kl: 0.019974888922221604
          policy_loss: 0.15701795717080433
          total_loss: 0.3600059731139077
          vf_explained_var: 0.0034620582591742277
          vf_loss: 0.20513681962685143
    num_agent_steps_sampled: 1520000
    num_agent_steps_trained: 1520000
    num_steps_sampled: 1520000
    num_steps_trained: 1520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1520,38344.3,1520000,-95.733,-41.2,-100,957.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1521000
  custom_metrics: {}
  date: 2021-10-22_06-25-15
  done: false
  episode_len_mean: 946.98
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -94.69799999999883
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4609
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0862560977246567e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.9444542460971408
          entropy_coeff: 0.009999999999999998
          kl: 0.008915675051621986
          policy_loss: 0.09155758337842093
          total_loss: 0.9977557086282306
          vf_explained_var: -0.23254722356796265
          vf_loss: 0.9156426461413503
    num_agent_steps_sampled: 1521000
    num_agent_steps_trained: 1521000
    num_steps_sampled: 1521000
    num_steps_trained: 15210

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1521,38361.4,1521000,-94.698,-41.2,-100,946.98


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1522000
  custom_metrics: {}
  date: 2021-10-22_06-25-32
  done: false
  episode_len_mean: 936.16
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -93.61599999999888
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4611
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0862560977246567e-54
          cur_lr: 5.000000000000001e-05
          entropy: 1.0680116481251187
          entropy_coeff: 0.009999999999999998
          kl: 0.017791889715911995
          policy_loss: 0.10435496386554506
          total_loss: 0.9392125093274646
          vf_explained_var: 0.007979992777109146
          vf_loss: 0.8455376640790039
    num_agent_steps_sampled: 1522000
    num_agent_steps_trained: 1522000
    num_steps_sampled: 1522000
    num_steps_trained: 15220

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1522,38379,1522000,-93.616,-41.2,-100,936.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1523000
  custom_metrics: {}
  date: 2021-10-22_06-25-50
  done: false
  episode_len_mean: 925.19
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -92.51899999999891
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4613
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0862560977246567e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.8758690761195289
          entropy_coeff: 0.009999999999999998
          kl: 0.011765003059255402
          policy_loss: -0.051951199769973755
          total_loss: 1.3750849614540737
          vf_explained_var: 0.008901776745915413
          vf_loss: 1.4357948521359098
    num_agent_steps_sampled: 1523000
    num_agent_steps_trained: 1523000
    num_steps_sampled: 1523000
    num_steps_trained: 152

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1523,38396.9,1523000,-92.519,-41.2,-100,925.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1524000
  custom_metrics: {}
  date: 2021-10-22_06-26-07
  done: false
  episode_len_mean: 914.32
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -91.43199999999894
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4615
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0862560977246567e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.8406532287597657
          entropy_coeff: 0.009999999999999998
          kl: 0.012053457477894528
          policy_loss: -0.07608632693688075
          total_loss: 1.4496381630500157
          vf_explained_var: -0.2920800745487213
          vf_loss: 1.5341309994045231
    num_agent_steps_sampled: 1524000
    num_agent_steps_trained: 1524000
    num_steps_sampled: 1524000
    num_steps_trained: 15240

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1524,38413.1,1524000,-91.432,-41.2,-100,914.32


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1525000
  custom_metrics: {}
  date: 2021-10-22_06-26-24
  done: false
  episode_len_mean: 903.96
  episode_media: {}
  episode_reward_max: -41.200000000000315
  episode_reward_mean: -90.39599999999899
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4617
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0862560977246567e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.8930479619238112
          entropy_coeff: 0.009999999999999998
          kl: 0.011987212374538102
          policy_loss: -0.08986799816290537
          total_loss: 1.481577111615075
          vf_explained_var: 0.021547846496105194
          vf_loss: 1.5803755892647637
    num_agent_steps_sampled: 1525000
    num_agent_steps_trained: 1525000
    num_steps_sampled: 1525000
    num_steps_trained: 15250

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1525,38430.2,1525000,-90.396,-41.2,-100,903.96




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1526000
  custom_metrics: {}
  date: 2021-10-22_06-27-00
  done: false
  episode_len_mean: 886.08
  episode_media: {}
  episode_reward_max: -35.20000000000023
  episode_reward_mean: -88.60799999999902
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 3
  episodes_total: 4620
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0862560977246567e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.8954521695772807
          entropy_coeff: 0.009999999999999998
          kl: 0.005767059959472616
          policy_loss: 0.050530143909984164
          total_loss: 1.5511960632271238
          vf_explained_var: -0.08642780780792236
          vf_loss: 1.509620423283842
    num_agent_steps_sampled: 1526000
    num_agent_steps_trained: 1526000
    num_steps_sampled: 1526000
    num_steps_trained: 152600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1526,38466.1,1526000,-88.608,-35.2,-100,886.08


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1527000
  custom_metrics: {}
  date: 2021-10-22_06-27-19
  done: false
  episode_len_mean: 874.84
  episode_media: {}
  episode_reward_max: -35.20000000000023
  episode_reward_mean: -87.48399999999907
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4622
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0862560977246567e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.910494593779246
          entropy_coeff: 0.009999999999999998
          kl: 0.008402982052445572
          policy_loss: -0.09132907738288244
          total_loss: 1.3767875485950045
          vf_explained_var: 0.028943825513124466
          vf_loss: 1.4772215978966818
    num_agent_steps_sampled: 1527000
    num_agent_steps_trained: 1527000
    num_steps_sampled: 1527000
    num_steps_trained: 152700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1527,38485.8,1527000,-87.484,-35.2,-100,874.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1528000
  custom_metrics: {}
  date: 2021-10-22_06-27-38
  done: false
  episode_len_mean: 864.05
  episode_media: {}
  episode_reward_max: -35.20000000000023
  episode_reward_mean: -86.4049999999991
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4624
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0862560977246567e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.9817086809211307
          entropy_coeff: 0.009999999999999998
          kl: 0.026862055152689537
          policy_loss: -0.07370687822500864
          total_loss: 1.2137503196795782
          vf_explained_var: -0.12335262447595596
          vf_loss: 1.29727432106932
    num_agent_steps_sampled: 1528000
    num_agent_steps_trained: 1528000
    num_steps_sampled: 1528000
    num_steps_trained: 1528000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1528,38504.5,1528000,-86.405,-35.2,-100,864.05


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1529000
  custom_metrics: {}
  date: 2021-10-22_06-27-57
  done: false
  episode_len_mean: 846.25
  episode_media: {}
  episode_reward_max: -35.20000000000023
  episode_reward_mean: -84.62499999999915
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 3
  episodes_total: 4627
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.629384146586985e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.9744044966167874
          entropy_coeff: 0.009999999999999998
          kl: 0.010851691643918256
          policy_loss: 0.04504847377538681
          total_loss: 1.44159487552113
          vf_explained_var: -0.06943266838788986
          vf_loss: 1.4062904421654012
    num_agent_steps_sampled: 1529000
    num_agent_steps_trained: 1529000
    num_steps_sampled: 1529000
    num_steps_trained: 1529000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1529,38524,1529000,-84.625,-35.2,-100,846.25


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1530000
  custom_metrics: {}
  date: 2021-10-22_06-28-18
  done: false
  episode_len_mean: 833.78
  episode_media: {}
  episode_reward_max: -35.20000000000023
  episode_reward_mean: -83.37799999999918
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 2
  episodes_total: 4629
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.629384146586985e-54
          cur_lr: 5.000000000000001e-05
          entropy: 1.0053758594724866
          entropy_coeff: 0.009999999999999998
          kl: 0.01346532250831716
          policy_loss: -0.08100508070654339
          total_loss: 1.291109835439258
          vf_explained_var: 0.02160497196018696
          vf_loss: 1.3821686651971605
    num_agent_steps_sampled: 1530000
    num_agent_steps_trained: 1530000
    num_steps_sampled: 1530000
    num_steps_trained: 1530000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1530,38544.5,1530000,-83.378,-35.2,-100,833.78


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1531000
  custom_metrics: {}
  date: 2021-10-22_06-28-41
  done: false
  episode_len_mean: 808.64
  episode_media: {}
  episode_reward_max: -31.500000000000178
  episode_reward_mean: -80.86399999999925
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 4
  episodes_total: 4633
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.629384146586985e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.9774024413691627
          entropy_coeff: 0.009999999999999998
          kl: 0.010948792808043587
          policy_loss: 0.028390241952406036
          total_loss: 1.7272524582015143
          vf_explained_var: 0.02813884988427162
          vf_loss: 1.7086362481117248
    num_agent_steps_sampled: 1531000
    num_agent_steps_trained: 1531000
    num_steps_sampled: 1531000
    num_steps_trained: 153100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1531,38567.2,1531000,-80.864,-31.5,-100,808.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1532000
  custom_metrics: {}
  date: 2021-10-22_06-29-05
  done: false
  episode_len_mean: 787.73
  episode_media: {}
  episode_reward_max: -29.700000000000152
  episode_reward_mean: -78.77299999999929
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 3
  episodes_total: 4636
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.629384146586985e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.8810454229513804
          entropy_coeff: 0.009999999999999998
          kl: 0.015480368561588244
          policy_loss: 0.04322832218474812
          total_loss: 1.3544659578137928
          vf_explained_var: 0.05077444761991501
          vf_loss: 1.3200481060478422
    num_agent_steps_sampled: 1532000
    num_agent_steps_trained: 1532000
    num_steps_sampled: 1532000
    num_steps_trained: 1532000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1532,38591.6,1532000,-78.773,-29.7,-100,787.73


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1533000
  custom_metrics: {}
  date: 2021-10-22_06-29-32
  done: false
  episode_len_mean: 766.18
  episode_media: {}
  episode_reward_max: -28.000000000000128
  episode_reward_mean: -76.61799999999934
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 3
  episodes_total: 4639
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.629384146586985e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.8479995965957642
          entropy_coeff: 0.009999999999999998
          kl: 0.009267439906951027
          policy_loss: -0.10947388104266591
          total_loss: 1.7193721930185955
          vf_explained_var: 0.07138009369373322
          vf_loss: 1.8373260643747118
    num_agent_steps_sampled: 1533000
    num_agent_steps_trained: 1533000
    num_steps_sampled: 1533000
    num_steps_trained: 153300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1533,38618.3,1533000,-76.618,-28,-100,766.18


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1534000
  custom_metrics: {}
  date: 2021-10-22_06-29-59
  done: false
  episode_len_mean: 736.74
  episode_media: {}
  episode_reward_max: -25.800000000000097
  episode_reward_mean: -73.67399999999941
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 4
  episodes_total: 4643
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.629384146586985e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.8835176116890378
          entropy_coeff: 0.009999999999999998
          kl: 0.016789022712095578
          policy_loss: -0.020930707289112938
          total_loss: 1.5520028193791708
          vf_explained_var: 0.145319402217865
          vf_loss: 1.5817687074343363
    num_agent_steps_sampled: 1534000
    num_agent_steps_trained: 1534000
    num_steps_sampled: 1534000
    num_steps_trained: 1534000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1534,38645.5,1534000,-73.674,-25.8,-100,736.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1535000
  custom_metrics: {}
  date: 2021-10-22_06-30-27
  done: false
  episode_len_mean: 707.02
  episode_media: {}
  episode_reward_max: -25.500000000000092
  episode_reward_mean: -70.70199999999947
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 4
  episodes_total: 4647
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.629384146586985e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.9178262498643663
          entropy_coeff: 0.009999999999999998
          kl: 0.005141993796422481
          policy_loss: -0.019687280886703067
          total_loss: 1.640933585166931
          vf_explained_var: 0.07484013587236404
          vf_loss: 1.669799100028144
    num_agent_steps_sampled: 1535000
    num_agent_steps_trained: 1535000
    num_steps_sampled: 1535000
    num_steps_trained: 1535000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1535,38672.9,1535000,-70.702,-25.5,-100,707.02




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1536000
  custom_metrics: {}
  date: 2021-10-22_06-31-12
  done: false
  episode_len_mean: 677.34
  episode_media: {}
  episode_reward_max: -22.50000000000005
  episode_reward_mean: -67.73399999999953
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 4
  episodes_total: 4651
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.629384146586985e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.9123626920912001
          entropy_coeff: 0.009999999999999998
          kl: 0.005176263574579012
          policy_loss: 0.009914638350407283
          total_loss: 1.632187881734636
          vf_explained_var: 0.051065847277641296
          vf_loss: 1.6313968631956313
    num_agent_steps_sampled: 1536000
    num_agent_steps_trained: 1536000
    num_steps_sampled: 1536000
    num_steps_trained: 1536000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1536,38718.1,1536000,-67.734,-22.5,-100,677.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1537000
  custom_metrics: {}
  date: 2021-10-22_06-31-39
  done: false
  episode_len_mean: 648.15
  episode_media: {}
  episode_reward_max: -22.50000000000005
  episode_reward_mean: -64.81499999999959
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 4
  episodes_total: 4655
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.629384146586985e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.8153245912657844
          entropy_coeff: 0.009999999999999998
          kl: 0.008476427864802518
          policy_loss: 0.012288824386066861
          total_loss: 1.6086293604638842
          vf_explained_var: 0.04457239434123039
          vf_loss: 1.6044938153690762
    num_agent_steps_sampled: 1537000
    num_agent_steps_trained: 1537000
    num_steps_sampled: 1537000
    num_steps_trained: 1537000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1537,38744.9,1537000,-64.815,-22.5,-100,648.15


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1538000
  custom_metrics: {}
  date: 2021-10-22_06-32-07
  done: false
  episode_len_mean: 618.46
  episode_media: {}
  episode_reward_max: -22.50000000000005
  episode_reward_mean: -61.84599999999964
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 4
  episodes_total: 4659
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.629384146586985e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.8010357896486918
          entropy_coeff: 0.009999999999999998
          kl: 0.004924526026261194
          policy_loss: 0.014963543166716893
          total_loss: 1.5945904042985704
          vf_explained_var: 0.03516155853867531
          vf_loss: 1.5876372151904636
    num_agent_steps_sampled: 1538000
    num_agent_steps_trained: 1538000
    num_steps_sampled: 1538000
    num_steps_trained: 1538000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1538,38773.6,1538000,-61.846,-22.5,-100,618.46


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1539000
  custom_metrics: {}
  date: 2021-10-22_06-32-36
  done: false
  episode_len_mean: 597.61
  episode_media: {}
  episode_reward_max: -22.50000000000005
  episode_reward_mean: -59.760999999999655
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 4
  episodes_total: 4663
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.146920732934925e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.8298536565568712
          entropy_coeff: 0.009999999999999998
          kl: 0.0055398312943402235
          policy_loss: 0.016264060015479722
          total_loss: 1.5621870716412862
          vf_explained_var: 0.02887171134352684
          vf_loss: 1.5542215559217665
    num_agent_steps_sampled: 1539000
    num_agent_steps_trained: 1539000
    num_steps_sampled: 1539000
    num_steps_trained: 15390

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1539,38802.2,1539000,-59.761,-22.5,-100,597.61


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1540000
  custom_metrics: {}
  date: 2021-10-22_06-33-05
  done: false
  episode_len_mean: 586.11
  episode_media: {}
  episode_reward_max: -22.50000000000005
  episode_reward_mean: -58.61099999999967
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 3
  episodes_total: 4666
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.146920732934925e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.8160416338178846
          entropy_coeff: 0.009999999999999998
          kl: 0.007051099305834659
          policy_loss: -0.0952845048573282
          total_loss: 1.479225410355462
          vf_explained_var: 0.019245240837335587
          vf_loss: 1.5826703283521864
    num_agent_steps_sampled: 1540000
    num_agent_steps_trained: 1540000
    num_steps_sampled: 1540000
    num_steps_trained: 1540000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1540,38831.2,1540000,-58.611,-22.5,-100,586.11


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1541000
  custom_metrics: {}
  date: 2021-10-22_06-33-33
  done: false
  episode_len_mean: 557.63
  episode_media: {}
  episode_reward_max: -22.50000000000005
  episode_reward_mean: -55.76299999999972
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 4
  episodes_total: 4670
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.146920732934925e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.825541948609882
          entropy_coeff: 0.009999999999999998
          kl: 0.00394651459484218
          policy_loss: -0.06072347395949894
          total_loss: 1.5209882921642728
          vf_explained_var: 0.03727732598781586
          vf_loss: 1.5899671607547337
    num_agent_steps_sampled: 1541000
    num_agent_steps_trained: 1541000
    num_steps_sampled: 1541000
    num_steps_trained: 1541000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1541,38859.6,1541000,-55.763,-22.5,-100,557.63


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1542000
  custom_metrics: {}
  date: 2021-10-22_06-34-03
  done: false
  episode_len_mean: 533.13
  episode_media: {}
  episode_reward_max: -22.50000000000005
  episode_reward_mean: -53.31299999999974
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 4
  episodes_total: 4674
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.0734603664674624e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.7232258637746175
          entropy_coeff: 0.009999999999999998
          kl: 0.008436297972148388
          policy_loss: -0.052547203790810376
          total_loss: 1.5691546453369989
          vf_explained_var: 0.03240939974784851
          vf_loss: 1.6289341304037306
    num_agent_steps_sampled: 1542000
    num_agent_steps_trained: 1542000
    num_steps_sampled: 1542000
    num_steps_trained: 15420

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1542,38889,1542000,-53.313,-22.5,-100,533.13


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1543000
  custom_metrics: {}
  date: 2021-10-22_06-34-32
  done: false
  episode_len_mean: 507.99
  episode_media: {}
  episode_reward_max: -22.50000000000005
  episode_reward_mean: -50.79899999999977
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 4
  episodes_total: 4678
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.0734603664674624e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.7029352909988827
          entropy_coeff: 0.009999999999999998
          kl: 0.00221788268434769
          policy_loss: -0.10671921078529623
          total_loss: 1.8288284579912821
          vf_explained_var: 0.032875046133995056
          vf_loss: 1.9425770190027025
    num_agent_steps_sampled: 1543000
    num_agent_steps_trained: 1543000
    num_steps_sampled: 1543000
    num_steps_trained: 154300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1543,38918.5,1543000,-50.799,-22.5,-100,507.99




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1544000
  custom_metrics: {}
  date: 2021-10-22_06-35-20
  done: false
  episode_len_mean: 473.92
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -47.391999999999825
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 5
  episodes_total: 4683
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0367301832337312e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.7216031584474776
          entropy_coeff: 0.009999999999999998
          kl: 0.0053055519730409945
          policy_loss: 0.005411954224109649
          total_loss: 1.7140603144963582
          vf_explained_var: 0.05698899179697037
          vf_loss: 1.7158643868234422
    num_agent_steps_sampled: 1544000
    num_agent_steps_trained: 1544000
    num_steps_sampled: 1544000
    num_steps_trained: 1544

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1544,38965.8,1544000,-47.392,-21,-100,473.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1545000
  custom_metrics: {}
  date: 2021-10-22_06-35-49
  done: false
  episode_len_mean: 445.41
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -44.54099999999987
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 4
  episodes_total: 4687
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0367301832337312e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.7075567490524716
          entropy_coeff: 0.009999999999999998
          kl: 0.002372866242482827
          policy_loss: 0.04037580846084489
          total_loss: 1.3003817094696892
          vf_explained_var: 0.03714446350932121
          vf_loss: 1.267081473271052
    num_agent_steps_sampled: 1545000
    num_agent_steps_trained: 1545000
    num_steps_sampled: 1545000
    num_steps_trained: 1545000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1545,38995.4,1545000,-44.541,-21,-100,445.41


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1546000
  custom_metrics: {}
  date: 2021-10-22_06-36-20
  done: false
  episode_len_mean: 417.76
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -41.775999999999925
  episode_reward_min: -99.9999999999986
  episodes_this_iter: 4
  episodes_total: 4691
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0183650916168656e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.6952537331316206
          entropy_coeff: 0.009999999999999998
          kl: 0.002900870964605594
          policy_loss: 0.021258960001998478
          total_loss: 1.5159585303730434
          vf_explained_var: 0.03141069412231445
          vf_loss: 1.5016520897547403
    num_agent_steps_sampled: 1546000
    num_agent_steps_trained: 1546000
    num_steps_sampled: 1546000
    num_steps_trained: 15460

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1546,39025.7,1546000,-41.776,-21,-100,417.76


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1547000
  custom_metrics: {}
  date: 2021-10-22_06-36-49
  done: false
  episode_len_mean: 387.92
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -38.79199999999999
  episode_reward_min: -99.69999999999861
  episodes_this_iter: 4
  episodes_total: 4695
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.091825458084328e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.628119965394338
          entropy_coeff: 0.009999999999999998
          kl: 0.006684033828200755
          policy_loss: 0.014945813351207309
          total_loss: 1.512859903441535
          vf_explained_var: 0.04485289379954338
          vf_loss: 1.5041952927907307
    num_agent_steps_sampled: 1547000
    num_agent_steps_trained: 1547000
    num_steps_sampled: 1547000
    num_steps_trained: 1547000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1547,39055.6,1547000,-38.792,-21,-99.7,387.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1548000
  custom_metrics: {}
  date: 2021-10-22_06-37-20
  done: false
  episode_len_mean: 357.97
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -35.79700000000004
  episode_reward_min: -99.69999999999861
  episodes_this_iter: 4
  episodes_total: 4699
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.091825458084328e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.636156764295366
          entropy_coeff: 0.009999999999999998
          kl: 0.0037007747702679735
          policy_loss: 0.0253553445968363
          total_loss: 1.466913104057312
          vf_explained_var: 0.04783882200717926
          vf_loss: 1.4479193502002292
    num_agent_steps_sampled: 1548000
    num_agent_steps_trained: 1548000
    num_steps_sampled: 1548000
    num_steps_trained: 1548000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1548,39085.7,1548000,-35.797,-21,-99.7,357.97


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1549000
  custom_metrics: {}
  date: 2021-10-22_06-37-50
  done: false
  episode_len_mean: 327.9
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -32.7900000000001
  episode_reward_min: -99.59999999999862
  episodes_this_iter: 4
  episodes_total: 4703
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.545912729042164e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.5948052492406634
          entropy_coeff: 0.009999999999999998
          kl: 0.003747181386041929
          policy_loss: -0.02668561968538496
          total_loss: 1.4574761523140802
          vf_explained_var: 0.06116404011845589
          vf_loss: 1.490109810564253
    num_agent_steps_sampled: 1549000
    num_agent_steps_trained: 1549000
    num_steps_sampled: 1549000
    num_steps_trained: 1549000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1549,39116.4,1549000,-32.79,-21,-99.6,327.9


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1550000
  custom_metrics: {}
  date: 2021-10-22_06-38-20
  done: false
  episode_len_mean: 297.91
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -29.79100000000015
  episode_reward_min: -50.80000000000045
  episodes_this_iter: 4
  episodes_total: 4707
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.272956364521082e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.6274815128909217
          entropy_coeff: 0.009999999999999998
          kl: 0.004857301195741807
          policy_loss: -0.03281154235204061
          total_loss: 1.451660266187456
          vf_explained_var: 0.06330312043428421
          vf_loss: 1.4907466411590575
    num_agent_steps_sampled: 1550000
    num_agent_steps_trained: 1550000
    num_steps_sampled: 1550000
    num_steps_trained: 1550000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1550,39145.8,1550000,-29.791,-21,-50.8,297.91




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1551000
  custom_metrics: {}
  date: 2021-10-22_06-39-08
  done: false
  episode_len_mean: 286.39
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -28.63900000000014
  episode_reward_min: -50.80000000000045
  episodes_this_iter: 5
  episodes_total: 4712
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.36478182260541e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.6064611766073439
          entropy_coeff: 0.009999999999999998
          kl: 0.00974573149524171
          policy_loss: -0.021938617610269124
          total_loss: 1.813777056005266
          vf_explained_var: 0.07801937311887741
          vf_loss: 1.8417802731196085
    num_agent_steps_sampled: 1551000
    num_agent_steps_trained: 1551000
    num_steps_sampled: 1551000
    num_steps_trained: 1551000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1551,39193.9,1551000,-28.639,-20.5,-50.8,286.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1552000
  custom_metrics: {}
  date: 2021-10-22_06-39-38
  done: false
  episode_len_mean: 277.46
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -27.74600000000013
  episode_reward_min: -47.600000000000406
  episodes_this_iter: 4
  episodes_total: 4716
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.36478182260541e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.6665001147323184
          entropy_coeff: 0.009999999999999998
          kl: 0.06346974367045292
          policy_loss: 0.016120358639293248
          total_loss: 1.2894135077794393
          vf_explained_var: 0.21807189285755157
          vf_loss: 1.2799581474728055
    num_agent_steps_sampled: 1552000
    num_agent_steps_trained: 1552000
    num_steps_sampled: 1552000
    num_steps_trained: 1552000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1552,39224,1552000,-27.746,-20.5,-47.6,277.46


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1553000
  custom_metrics: {}
  date: 2021-10-22_06-40-09
  done: false
  episode_len_mean: 270.29
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -27.02900000000012
  episode_reward_min: -47.600000000000406
  episodes_this_iter: 4
  episodes_total: 4720
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.547172733908113e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.5439526345994737
          entropy_coeff: 0.009999999999999998
          kl: 0.0032550979924379792
          policy_loss: 0.03862657414542304
          total_loss: 1.2296179705195958
          vf_explained_var: 0.05436406284570694
          vf_loss: 1.1964309301641252
    num_agent_steps_sampled: 1553000
    num_agent_steps_trained: 1553000
    num_steps_sampled: 1553000
    num_steps_trained: 15530

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1553,39254.9,1553000,-27.029,-20.5,-47.6,270.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1554000
  custom_metrics: {}
  date: 2021-10-22_06-40-40
  done: false
  episode_len_mean: 261.8
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -26.180000000000106
  episode_reward_min: -42.80000000000034
  episodes_this_iter: 4
  episodes_total: 4724
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.7735863669540564e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.5340080367194282
          entropy_coeff: 0.009999999999999998
          kl: 0.004682664091692838
          policy_loss: 0.03875775138537089
          total_loss: 1.052824385298623
          vf_explained_var: 0.09444793313741684
          vf_loss: 1.0194067075848579
    num_agent_steps_sampled: 1554000
    num_agent_steps_trained: 1554000
    num_steps_sampled: 1554000
    num_steps_trained: 1554000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1554,39285.5,1554000,-26.18,-20.5,-42.8,261.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1555000
  custom_metrics: {}
  date: 2021-10-22_06-41-10
  done: false
  episode_len_mean: 255.39
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -25.539000000000087
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 4728
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3867931834770282e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.5208535267247094
          entropy_coeff: 0.009999999999999998
          kl: 0.015210007780173227
          policy_loss: -0.004643225007587009
          total_loss: 1.3275220519966549
          vf_explained_var: 0.15844953060150146
          vf_loss: 1.337373814980189
    num_agent_steps_sampled: 1555000
    num_agent_steps_trained: 1555000
    num_steps_sampled: 1555000
    num_steps_trained: 155

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1555,39315.8,1555000,-25.539,-20.5,-37.6,255.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1556000
  custom_metrics: {}
  date: 2021-10-22_06-41-40
  done: false
  episode_len_mean: 251.01
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -25.101000000000084
  episode_reward_min: -31.500000000000178
  episodes_this_iter: 4
  episodes_total: 4732
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3867931834770282e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.5298076159424252
          entropy_coeff: 0.009999999999999998
          kl: 0.019529331527556437
          policy_loss: -0.10813970838983854
          total_loss: 1.659410097532802
          vf_explained_var: 0.24517260491847992
          vf_loss: 1.7728478925095663
    num_agent_steps_sampled: 1556000
    num_agent_steps_trained: 1556000
    num_steps_sampled: 1556000
    num_steps_trained: 1556

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1556,39345.8,1556000,-25.101,-20.5,-31.5,251.01


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1557000
  custom_metrics: {}
  date: 2021-10-22_06-42-10
  done: false
  episode_len_mean: 249.0
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.900000000000087
  episode_reward_min: -29.400000000000148
  episodes_this_iter: 4
  episodes_total: 4736
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3867931834770282e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.6131985349787606
          entropy_coeff: 0.009999999999999998
          kl: 0.0253988507203543
          policy_loss: -0.01098321634862158
          total_loss: 1.277419666780366
          vf_explained_var: -0.14824241399765015
          vf_loss: 1.2945348569088513
    num_agent_steps_sampled: 1557000
    num_agent_steps_trained: 1557000
    num_steps_sampled: 1557000
    num_steps_trained: 155700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1557,39376.2,1557000,-24.9,-20.5,-29.4,249




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1558000
  custom_metrics: {}
  date: 2021-10-22_06-42-59
  done: false
  episode_len_mean: 247.0
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.700000000000077
  episode_reward_min: -29.400000000000148
  episodes_this_iter: 5
  episodes_total: 4741
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5801897752155437e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.5866368879874547
          entropy_coeff: 0.009999999999999998
          kl: 0.008299981581050513
          policy_loss: -0.0027453964783085718
          total_loss: 1.7960133062468635
          vf_explained_var: 0.20482711493968964
          vf_loss: 1.8046250694327883
    num_agent_steps_sampled: 1558000
    num_agent_steps_trained: 1558000
    num_steps_sampled: 1558000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1558,39424.6,1558000,-24.7,-20.5,-29.4,247


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1559000
  custom_metrics: {}
  date: 2021-10-22_06-43-20
  done: false
  episode_len_mean: 248.29
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.829000000000082
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 2
  episodes_total: 4743
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5801897752155437e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.7212850623660617
          entropy_coeff: 0.009999999999999998
          kl: 0.032521134248783785
          policy_loss: -0.16616894023285972
          total_loss: 0.2177985082897875
          vf_explained_var: 0.7925113439559937
          vf_loss: 0.3911802993880378
    num_agent_steps_sampled: 1559000
    num_agent_steps_trained: 1559000
    num_steps_sampled: 1559000
    num_steps_trained: 15590

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1559,39445.9,1559000,-24.829,-20.5,-34,248.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1560000
  custom_metrics: {}
  date: 2021-10-22_06-43-44
  done: false
  episode_len_mean: 251.07
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -25.107000000000088
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 3
  episodes_total: 4746
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.370284662823312e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.8590769721402063
          entropy_coeff: 0.009999999999999998
          kl: 0.01131450951417921
          policy_loss: -0.11296201108230484
          total_loss: 0.45323711931705474
          vf_explained_var: 0.7485488653182983
          vf_loss: 0.5747899000843366
    num_agent_steps_sampled: 1560000
    num_agent_steps_trained: 1560000
    num_steps_sampled: 1560000
    num_steps_trained: 156000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1560,39469.7,1560000,-25.107,-20.5,-42.1,251.07


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1561000
  custom_metrics: {}
  date: 2021-10-22_06-44-11
  done: false
  episode_len_mean: 252.85
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -25.285000000000085
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4750
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.370284662823312e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.7667687760459052
          entropy_coeff: 0.009999999999999998
          kl: 0.02144825280326567
          policy_loss: 0.15321825966238975
          total_loss: 0.5996018303765192
          vf_explained_var: 0.8856146335601807
          vf_loss: 0.4540512634648217
    num_agent_steps_sampled: 1561000
    num_agent_steps_trained: 1561000
    num_steps_sampled: 1561000
    num_steps_trained: 1561000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1561,39497.3,1561000,-25.285,-20.5,-42.1,252.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1562000
  custom_metrics: {}
  date: 2021-10-22_06-44-40
  done: false
  episode_len_mean: 252.18
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -25.218000000000085
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4754
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.05542699423497e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.7010981493526035
          entropy_coeff: 0.009999999999999998
          kl: 0.01422901823576505
          policy_loss: -0.010718366089794372
          total_loss: 0.5341707554128435
          vf_explained_var: 0.805324912071228
          vf_loss: 0.5519001060061984
    num_agent_steps_sampled: 1562000
    num_agent_steps_trained: 1562000
    num_steps_sampled: 1562000
    num_steps_trained: 1562000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1562,39526.1,1562000,-25.218,-20.5,-42.1,252.18


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1563000
  custom_metrics: {}
  date: 2021-10-22_06-45-09
  done: false
  episode_len_mean: 251.7
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -25.170000000000087
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4758
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.05542699423497e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.5812481840451559
          entropy_coeff: 0.009999999999999998
          kl: 0.0090396584785108
          policy_loss: -0.0004352061284912957
          total_loss: 0.6764571511083179
          vf_explained_var: 0.7467210292816162
          vf_loss: 0.6827048401037852
    num_agent_steps_sampled: 1563000
    num_agent_steps_trained: 1563000
    num_steps_sampled: 1563000
    num_steps_trained: 1563000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1563,39555,1563000,-25.17,-20.5,-42.1,251.7


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1564000
  custom_metrics: {}
  date: 2021-10-22_06-45-38
  done: false
  episode_len_mean: 251.48
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -25.14800000000009
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4762
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.05542699423497e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.5512214869260788
          entropy_coeff: 0.009999999999999998
          kl: 0.010361374236000465
          policy_loss: 0.052777032264404826
          total_loss: 0.7911880360709296
          vf_explained_var: 0.597318708896637
          vf_loss: 0.7439232150713603
    num_agent_steps_sampled: 1564000
    num_agent_steps_trained: 1564000
    num_steps_sampled: 1564000
    num_steps_trained: 1564000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1564,39584,1564000,-25.148,-20.5,-42.1,251.48


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1565000
  custom_metrics: {}
  date: 2021-10-22_06-46-08
  done: false
  episode_len_mean: 251.03
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -25.103000000000094
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4766
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.05542699423497e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.6999852372540368
          entropy_coeff: 0.009999999999999998
          kl: 0.0781095723820886
          policy_loss: 0.016822959979375204
          total_loss: 0.7251915663480759
          vf_explained_var: 0.5962163805961609
          vf_loss: 0.7153684543238745
    num_agent_steps_sampled: 1565000
    num_agent_steps_trained: 1565000
    num_steps_sampled: 1565000
    num_steps_trained: 1565000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1565,39613.3,1565000,-25.103,-20.5,-42.1,251.03




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1566000
  custom_metrics: {}
  date: 2021-10-22_06-46-54
  done: false
  episode_len_mean: 250.66
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -25.066000000000084
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4770
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2083140491352459e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.705243546432919
          entropy_coeff: 0.009999999999999998
          kl: 0.05206543759565597
          policy_loss: 0.030759632256295945
          total_loss: 0.6312449948655234
          vf_explained_var: 0.7603371739387512
          vf_loss: 0.6075377980868022
    num_agent_steps_sampled: 1566000
    num_agent_steps_trained: 1566000
    num_steps_sampled: 1566000
    num_steps_trained: 1566000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1566,39659.4,1566000,-25.066,-20.5,-42.1,250.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1567000
  custom_metrics: {}
  date: 2021-10-22_06-47-24
  done: false
  episode_len_mean: 250.64
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -25.064000000000092
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4774
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8124710737028692e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.5819988509019216
          entropy_coeff: 0.009999999999999998
          kl: 0.011822296850074724
          policy_loss: 0.03384793736040592
          total_loss: 0.8129243241416083
          vf_explained_var: 0.6140556931495667
          vf_loss: 0.7848963807026546
    num_agent_steps_sampled: 1567000
    num_agent_steps_trained: 1567000
    num_steps_sampled: 1567000
    num_steps_trained: 156700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1567,39689.9,1567000,-25.064,-20.5,-42.1,250.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1568000
  custom_metrics: {}
  date: 2021-10-22_06-47-54
  done: false
  episode_len_mean: 250.54
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -25.054000000000087
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4778
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8124710737028692e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.6441925062073601
          entropy_coeff: 0.009999999999999998
          kl: 0.028011588403079834
          policy_loss: 0.035182320450743036
          total_loss: 0.8546598275502523
          vf_explained_var: 0.5272579789161682
          vf_loss: 0.8259194321102566
    num_agent_steps_sampled: 1568000
    num_agent_steps_trained: 1568000
    num_steps_sampled: 1568000
    num_steps_trained: 15680

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1568,39720.2,1568000,-25.054,-20.5,-42.1,250.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1569000
  custom_metrics: {}
  date: 2021-10-22_06-48-25
  done: false
  episode_len_mean: 250.68
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -25.068000000000083
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4782
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7187066105543033e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.518064527048005
          entropy_coeff: 0.009999999999999998
          kl: 0.0034191226181150116
          policy_loss: -0.0599845743427674
          total_loss: 0.7932278441058265
          vf_explained_var: 0.4752312898635864
          vf_loss: 0.8583930552005767
    num_agent_steps_sampled: 1569000
    num_agent_steps_trained: 1569000
    num_steps_sampled: 1569000
    num_steps_trained: 156900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1569,39750.3,1569000,-25.068,-20.5,-42.1,250.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1570000
  custom_metrics: {}
  date: 2021-10-22_06-48-55
  done: false
  episode_len_mean: 250.39
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -25.039000000000087
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 5
  episodes_total: 4787
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3593533052771516e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.5491179529163572
          entropy_coeff: 0.009999999999999998
          kl: 0.00890932966583446
          policy_loss: -0.03682421441707346
          total_loss: 0.8006836341487037
          vf_explained_var: 0.596278190612793
          vf_loss: 0.8429990271727245
    num_agent_steps_sampled: 1570000
    num_agent_steps_trained: 1570000
    num_steps_sampled: 1570000
    num_steps_trained: 1570000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1570,39780.9,1570000,-25.039,-20.5,-42.1,250.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1571000
  custom_metrics: {}
  date: 2021-10-22_06-49-25
  done: false
  episode_len_mean: 250.15
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -25.015000000000086
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4791
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3593533052771516e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.5343853861093522
          entropy_coeff: 0.009999999999999998
          kl: 0.007309649004838863
          policy_loss: 0.020444852651821242
          total_loss: 0.874405136373308
          vf_explained_var: 0.4047172963619232
          vf_loss: 0.8593041406737434
    num_agent_steps_sampled: 1571000
    num_agent_steps_trained: 1571000
    num_steps_sampled: 1571000
    num_steps_trained: 157100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1571,39810.7,1571000,-25.015,-20.5,-42.1,250.15


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1572000
  custom_metrics: {}
  date: 2021-10-22_06-49-56
  done: false
  episode_len_mean: 249.97
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.99700000000009
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4795
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3593533052771516e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.6359814726644092
          entropy_coeff: 0.009999999999999998
          kl: 0.09669539271439215
          policy_loss: 0.018522849140895737
          total_loss: 0.6014635297987196
          vf_explained_var: 0.6185700297355652
          vf_loss: 0.5893004894256592
    num_agent_steps_sampled: 1572000
    num_agent_steps_trained: 1572000
    num_steps_sampled: 1572000
    num_steps_trained: 1572000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1572,39841.2,1572000,-24.997,-20.5,-42.1,249.97


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1573000
  custom_metrics: {}
  date: 2021-10-22_06-50-26
  done: false
  episode_len_mean: 249.81
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.98100000000009
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4799
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0390299579157277e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.6601689202917946
          entropy_coeff: 0.009999999999999998
          kl: 0.16130676108678807
          policy_loss: 0.0827708024945524
          total_loss: 0.6103342006603877
          vf_explained_var: 0.6615323424339294
          vf_loss: 0.5341650863488515
    num_agent_steps_sampled: 1573000
    num_agent_steps_trained: 1573000
    num_steps_sampled: 1573000
    num_steps_trained: 1573000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1573,39871.5,1573000,-24.981,-20.5,-42.1,249.81




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1574000
  custom_metrics: {}
  date: 2021-10-22_06-51-16
  done: false
  episode_len_mean: 249.49
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.949000000000083
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4803
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.05854493687359e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.7615348286098904
          entropy_coeff: 0.009999999999999998
          kl: 0.16898189100373504
          policy_loss: -0.14909468682275878
          total_loss: 0.4235650398665004
          vf_explained_var: 0.7336369156837463
          vf_loss: 0.5802750693427192
    num_agent_steps_sampled: 1574000
    num_agent_steps_trained: 1574000
    num_steps_sampled: 1574000
    num_steps_trained: 1574000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1574,39921.1,1574000,-24.949,-20.5,-42.1,249.49


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1575000
  custom_metrics: {}
  date: 2021-10-22_06-51-44
  done: false
  episode_len_mean: 249.55
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.955000000000087
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 5
  episodes_total: 4808
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.587817405310386e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.8506229142347972
          entropy_coeff: 0.009999999999999998
          kl: 0.06355725236068616
          policy_loss: -0.05132510575155417
          total_loss: 0.23833959185414844
          vf_explained_var: 0.897922933101654
          vf_loss: 0.29817092501454884
    num_agent_steps_sampled: 1575000
    num_agent_steps_trained: 1575000
    num_steps_sampled: 1575000
    num_steps_trained: 157500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1575,39950.1,1575000,-24.955,-20.5,-42.1,249.55


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1576000
  custom_metrics: {}
  date: 2021-10-22_06-52-14
  done: false
  episode_len_mean: 250.06
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -25.006000000000085
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4812
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.881726107965581e-56
          cur_lr: 5.000000000000001e-05
          entropy: 1.048724024825626
          entropy_coeff: 0.009999999999999998
          kl: 0.08843318412160149
          policy_loss: -0.07139230216335919
          total_loss: 0.26974015475975144
          vf_explained_var: 0.8224981427192688
          vf_loss: 0.35161970290872785
    num_agent_steps_sampled: 1576000
    num_agent_steps_trained: 1576000
    num_steps_sampled: 1576000
    num_steps_trained: 157600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1576,39979.4,1576000,-25.006,-21.1,-42.1,250.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1577000
  custom_metrics: {}
  date: 2021-10-22_06-52-42
  done: false
  episode_len_mean: 250.56
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -25.056000000000086
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4816
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0322589161948366e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.7864870170752207
          entropy_coeff: 0.009999999999999998
          kl: 0.031188325784685132
          policy_loss: -0.03264591942230861
          total_loss: 0.727905911869473
          vf_explained_var: 0.4773898720741272
          vf_loss: 0.7684167179796431
    num_agent_steps_sampled: 1577000
    num_agent_steps_trained: 1577000
    num_steps_sampled: 1577000
    num_steps_trained: 157700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1577,40008,1577000,-25.056,-21.1,-42.1,250.56


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1578000
  custom_metrics: {}
  date: 2021-10-22_06-53-12
  done: false
  episode_len_mean: 250.99
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -25.099000000000093
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4820
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5483883742922552e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.8283402091926999
          entropy_coeff: 0.009999999999999998
          kl: 0.04305063630949206
          policy_loss: 0.011584475636482239
          total_loss: 0.7246923830774096
          vf_explained_var: 0.25833860039711
          vf_loss: 0.721391314930386
    num_agent_steps_sampled: 1578000
    num_agent_steps_trained: 1578000
    num_steps_sampled: 1578000
    num_steps_trained: 1578000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1578,40037.3,1578000,-25.099,-21.1,-42.1,250.99


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1579000
  custom_metrics: {}
  date: 2021-10-22_06-53-40
  done: false
  episode_len_mean: 251.44
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -25.14400000000009
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4824
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3225825614383837e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.7871734393967522
          entropy_coeff: 0.009999999999999998
          kl: 0.01986348739730709
          policy_loss: 0.009552400641971165
          total_loss: 0.9694428000185225
          vf_explained_var: 0.1376868188381195
          vf_loss: 0.9677621510293749
    num_agent_steps_sampled: 1579000
    num_agent_steps_trained: 1579000
    num_steps_sampled: 1579000
    num_steps_trained: 1579000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1579,40065.9,1579000,-25.144,-21.1,-42.1,251.44


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1580000
  custom_metrics: {}
  date: 2021-10-22_06-54-11
  done: false
  episode_len_mean: 251.75
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -25.17500000000009
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4828
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3225825614383837e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.7062422302034166
          entropy_coeff: 0.009999999999999998
          kl: 0.005016611822847257
          policy_loss: 0.0022551532420847153
          total_loss: 0.8778016845385234
          vf_explained_var: 0.3500436544418335
          vf_loss: 0.8826089653703901
    num_agent_steps_sampled: 1580000
    num_agent_steps_trained: 1580000
    num_steps_sampled: 1580000
    num_steps_trained: 15800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1580,40096,1580000,-25.175,-21.1,-42.1,251.75




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1581000
  custom_metrics: {}
  date: 2021-10-22_06-54-58
  done: false
  episode_len_mean: 251.77
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -25.177000000000092
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4832
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3225825614383837e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.756821029053794
          entropy_coeff: 0.009999999999999998
          kl: 0.027184745290410067
          policy_loss: -0.0016447227034303877
          total_loss: 0.8885342505243089
          vf_explained_var: 0.5029512047767639
          vf_loss: 0.8977471861574385
    num_agent_steps_sampled: 1581000
    num_agent_steps_trained: 1581000
    num_steps_sampled: 1581000
    num_steps_trained: 1581

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1581,40143,1581000,-25.177,-21.1,-42.1,251.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1582000
  custom_metrics: {}
  date: 2021-10-22_06-55-27
  done: false
  episode_len_mean: 251.92
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -25.19200000000009
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4836
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4838738421575745e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.770204707649019
          entropy_coeff: 0.009999999999999998
          kl: 0.0056112597067076485
          policy_loss: 0.02060988230837716
          total_loss: 1.0428072810173035
          vf_explained_var: 0.44157904386520386
          vf_loss: 1.0298994468318092
    num_agent_steps_sampled: 1582000
    num_agent_steps_trained: 1582000
    num_steps_sampled: 1582000
    num_steps_trained: 158200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1582,40171.9,1582000,-25.192,-21.1,-42.1,251.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1583000
  custom_metrics: {}
  date: 2021-10-22_06-55-55
  done: false
  episode_len_mean: 252.6
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -25.260000000000094
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 4
  episodes_total: 4840
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4838738421575745e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.75003070169025
          entropy_coeff: 0.009999999999999998
          kl: 0.025759737016693093
          policy_loss: -0.01027055862877104
          total_loss: 0.9343175854947832
          vf_explained_var: 0.5767331123352051
          vf_loss: 0.9520884427759383
    num_agent_steps_sampled: 1583000
    num_agent_steps_trained: 1583000
    num_steps_sampled: 1583000
    num_steps_trained: 1583000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1583,40200.7,1583000,-25.26,-21.1,-42.1,252.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1584000
  custom_metrics: {}
  date: 2021-10-22_06-56-24
  done: false
  episode_len_mean: 251.41
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -25.141000000000094
  episode_reward_min: -42.10000000000033
  episodes_this_iter: 3
  episodes_total: 4843
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.225810763236362e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.5825854745176103
          entropy_coeff: 0.009999999999999998
          kl: 0.009307476259734276
          policy_loss: -0.10241595622566012
          total_loss: 0.7828382356299295
          vf_explained_var: 0.5993285179138184
          vf_loss: 0.8910800357659657
    num_agent_steps_sampled: 1584000
    num_agent_steps_trained: 1584000
    num_steps_sampled: 1584000
    num_steps_trained: 158400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1584,40229.9,1584000,-25.141,-21.1,-42.1,251.41


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1585000
  custom_metrics: {}
  date: 2021-10-22_06-56-54
  done: false
  episode_len_mean: 246.5
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.65000000000008
  episode_reward_min: -30.400000000000162
  episodes_this_iter: 5
  episodes_total: 4848
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.225810763236362e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.5254145592451096
          entropy_coeff: 0.009999999999999998
          kl: 0.02915153852207924
          policy_loss: -0.00956611724363433
          total_loss: 1.1373798780971103
          vf_explained_var: 0.5090958476066589
          vf_loss: 1.1522001425425212
    num_agent_steps_sampled: 1585000
    num_agent_steps_trained: 1585000
    num_steps_sampled: 1585000
    num_steps_trained: 1585000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1585,40259.5,1585000,-24.65,-21.1,-30.4,246.5


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1586000
  custom_metrics: {}
  date: 2021-10-22_06-57-25
  done: false
  episode_len_mean: 245.85
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.58500000000008
  episode_reward_min: -27.40000000000012
  episodes_this_iter: 4
  episodes_total: 4852
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.838716144854545e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.5171255403094821
          entropy_coeff: 0.009999999999999998
          kl: 0.021825737422932513
          policy_loss: 0.009431241038772796
          total_loss: 0.877615753809611
          vf_explained_var: 0.5091109871864319
          vf_loss: 0.8733557793829176
    num_agent_steps_sampled: 1586000
    num_agent_steps_trained: 1586000
    num_steps_sampled: 1586000
    num_steps_trained: 1586000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1586,40290.7,1586000,-24.585,-21.1,-27.4,245.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1587000
  custom_metrics: {}
  date: 2021-10-22_06-57-55
  done: false
  episode_len_mean: 245.45
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.545000000000076
  episode_reward_min: -27.40000000000012
  episodes_this_iter: 4
  episodes_total: 4856
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1758074217281817e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.5152381307548947
          entropy_coeff: 0.009999999999999998
          kl: 0.01508203075990069
          policy_loss: 0.036750591380728614
          total_loss: 0.9264051814874014
          vf_explained_var: 0.5394841432571411
          vf_loss: 0.8948069738017188
    num_agent_steps_sampled: 1587000
    num_agent_steps_trained: 1587000
    num_steps_sampled: 1587000
    num_steps_trained: 158700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1587,40320.2,1587000,-24.545,-21.1,-27.4,245.45




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1588000
  custom_metrics: {}
  date: 2021-10-22_06-58-42
  done: false
  episode_len_mean: 244.8
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.48000000000008
  episode_reward_min: -27.40000000000012
  episodes_this_iter: 4
  episodes_total: 4860
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1758074217281817e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.41008950140741135
          entropy_coeff: 0.009999999999999998
          kl: 0.00732702558303523
          policy_loss: 0.03978374641802576
          total_loss: 0.833210600581434
          vf_explained_var: 0.5203607678413391
          vf_loss: 0.7975277450349596
    num_agent_steps_sampled: 1588000
    num_agent_steps_trained: 1588000
    num_steps_sampled: 1588000
    num_steps_trained: 1588000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1588,40367.2,1588000,-24.48,-21.1,-27.4,244.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1589000
  custom_metrics: {}
  date: 2021-10-22_06-59-14
  done: false
  episode_len_mean: 244.66
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.46600000000008
  episode_reward_min: -27.40000000000012
  episodes_this_iter: 4
  episodes_total: 4864
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1758074217281817e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.40482567614979215
          entropy_coeff: 0.009999999999999998
          kl: 0.0015932662130736
          policy_loss: 0.021365992228190103
          total_loss: 1.158637120988634
          vf_explained_var: 0.3114190995693207
          vf_loss: 1.1413193729188706
    num_agent_steps_sampled: 1589000
    num_agent_steps_trained: 1589000
    num_steps_sampled: 1589000
    num_steps_trained: 1589000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1589,40399.1,1589000,-24.466,-21.1,-27.4,244.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1590000
  custom_metrics: {}
  date: 2021-10-22_06-59-44
  done: false
  episode_len_mean: 244.38
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.43800000000008
  episode_reward_min: -27.40000000000012
  episodes_this_iter: 4
  episodes_total: 4868
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.8790371086409086e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.40098885628912184
          entropy_coeff: 0.009999999999999998
          kl: 0.001049643912320164
          policy_loss: -0.06406609970662329
          total_loss: 1.0930415325694613
          vf_explained_var: 0.2780942916870117
          vf_loss: 1.161117516623603
    num_agent_steps_sampled: 1590000
    num_agent_steps_trained: 1590000
    num_steps_sampled: 1590000
    num_steps_trained: 159000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1590,40429,1590000,-24.438,-21.1,-27.4,244.38


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1591000
  custom_metrics: {}
  date: 2021-10-22_07-00-14
  done: false
  episode_len_mean: 244.29
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.429000000000073
  episode_reward_min: -27.40000000000012
  episodes_this_iter: 4
  episodes_total: 4872
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9395185543204543e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.400622738732232
          entropy_coeff: 0.009999999999999998
          kl: 0.0024674579561526065
          policy_loss: -0.06921577511562241
          total_loss: 1.246763042608897
          vf_explained_var: 0.2867603003978729
          vf_loss: 1.3199850347306994
    num_agent_steps_sampled: 1591000
    num_agent_steps_trained: 1591000
    num_steps_sampled: 1591000
    num_steps_trained: 159100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1591,40458.9,1591000,-24.429,-21.1,-27.4,244.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1592000
  custom_metrics: {}
  date: 2021-10-22_07-00-43
  done: false
  episode_len_mean: 243.95
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.395000000000074
  episode_reward_min: -27.40000000000012
  episodes_this_iter: 5
  episodes_total: 4877
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4697592771602272e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.39832986692587535
          entropy_coeff: 0.009999999999999998
          kl: 0.004205001284112579
          policy_loss: -0.017242674860689373
          total_loss: 1.294266539812088
          vf_explained_var: 0.406629353761673
          vf_loss: 1.3154925180806054
    num_agent_steps_sampled: 1592000
    num_agent_steps_trained: 1592000
    num_steps_sampled: 1592000
    num_steps_trained: 15920

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1592,40488.6,1592000,-24.395,-21.1,-27.4,243.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1593000
  custom_metrics: {}
  date: 2021-10-22_07-01-13
  done: false
  episode_len_mean: 244.04
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.404000000000075
  episode_reward_min: -27.40000000000012
  episodes_this_iter: 4
  episodes_total: 4881
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.348796385801136e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.4076840612623427
          entropy_coeff: 0.009999999999999998
          kl: 0.003378640506260745
          policy_loss: 0.007696277399857839
          total_loss: 1.029603319035636
          vf_explained_var: 0.3746582865715027
          vf_loss: 1.0259838965204027
    num_agent_steps_sampled: 1593000
    num_agent_steps_trained: 1593000
    num_steps_sampled: 1593000
    num_steps_trained: 1593000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1593,40518.6,1593000,-24.404,-21.1,-27.4,244.04


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1594000
  custom_metrics: {}
  date: 2021-10-22_07-01-44
  done: false
  episode_len_mean: 244.03
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.403000000000073
  episode_reward_min: -27.40000000000012
  episodes_this_iter: 4
  episodes_total: 4885
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.674398192900568e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.5006965100765228
          entropy_coeff: 0.009999999999999998
          kl: 0.027475635924462772
          policy_loss: -0.004690567238463296
          total_loss: 0.9857767754130893
          vf_explained_var: 0.4093444049358368
          vf_loss: 0.9954743093914455
    num_agent_steps_sampled: 1594000
    num_agent_steps_trained: 1594000
    num_steps_sampled: 1594000
    num_steps_trained: 15940

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1594,40548.9,1594000,-24.403,-21.1,-27.4,244.03


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1595000
  custom_metrics: {}
  date: 2021-10-22_07-02-13
  done: false
  episode_len_mean: 244.18
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.418000000000077
  episode_reward_min: -27.40000000000012
  episodes_this_iter: 4
  episodes_total: 4889
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.5115972893508505e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.36900521616141
          entropy_coeff: 0.009999999999999998
          kl: 0.0277096726247401
          policy_loss: 0.0409032235129012
          total_loss: 0.9825818452570173
          vf_explained_var: 0.401885986328125
          vf_loss: 0.9453686667813195
    num_agent_steps_sampled: 1595000
    num_agent_steps_trained: 1595000
    num_steps_sampled: 1595000
    num_steps_trained: 1595000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1595,40578.6,1595000,-24.418,-21.1,-27.4,244.18




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1596000
  custom_metrics: {}
  date: 2021-10-22_07-03-02
  done: false
  episode_len_mean: 244.02
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.402000000000076
  episode_reward_min: -27.40000000000012
  episodes_this_iter: 4
  episodes_total: 4893
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.267395934026274e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.20099729109141562
          entropy_coeff: 0.009999999999999998
          kl: 0.010560465104245667
          policy_loss: -0.00550750560230679
          total_loss: 0.7608785774972704
          vf_explained_var: 0.5267046689987183
          vf_loss: 0.7683960616588592
    num_agent_steps_sampled: 1596000
    num_agent_steps_trained: 1596000
    num_steps_sampled: 1596000
    num_steps_trained: 15960

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1596,40627.6,1596000,-24.402,-21.1,-27.4,244.02


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1597000
  custom_metrics: {}
  date: 2021-10-22_07-03-32
  done: false
  episode_len_mean: 244.09
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -24.409000000000074
  episode_reward_min: -27.40000000000012
  episodes_this_iter: 4
  episodes_total: 4897
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.267395934026274e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.2315735643108686
          entropy_coeff: 0.009999999999999998
          kl: 0.005211862854640496
          policy_loss: -0.0977478520737754
          total_loss: 0.9173395070764754
          vf_explained_var: 0.39528322219848633
          vf_loss: 1.0174030853642357
    num_agent_steps_sampled: 1597000
    num_agent_steps_trained: 1597000
    num_steps_sampled: 1597000
    num_steps_trained: 159700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1597,40657.5,1597000,-24.409,-21.1,-27.4,244.09


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1598000
  custom_metrics: {}
  date: 2021-10-22_07-04-02
  done: false
  episode_len_mean: 244.45
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -24.44500000000007
  episode_reward_min: -27.40000000000012
  episodes_this_iter: 5
  episodes_total: 4902
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.267395934026274e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.28941926889949376
          entropy_coeff: 0.009999999999999998
          kl: 0.059337888663881934
          policy_loss: -0.03557558945483631
          total_loss: 0.9839872479438782
          vf_explained_var: 0.4772110879421234
          vf_loss: 1.022457034058041
    num_agent_steps_sampled: 1598000
    num_agent_steps_trained: 1598000
    num_steps_sampled: 1598000
    num_steps_trained: 159800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1598,40687.2,1598000,-24.445,-21.5,-27.4,244.45


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1599000
  custom_metrics: {}
  date: 2021-10-22_07-04-32
  done: false
  episode_len_mean: 244.42
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -24.442000000000075
  episode_reward_min: -27.40000000000012
  episodes_this_iter: 4
  episodes_total: 4906
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2401093901039415e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.15973049990004964
          entropy_coeff: 0.009999999999999998
          kl: 0.0032487959776721217
          policy_loss: 0.03194243357413345
          total_loss: 0.8188223540782928
          vf_explained_var: 0.4338153898715973
          vf_loss: 0.7884772274229261
    num_agent_steps_sampled: 1599000
    num_agent_steps_trained: 1599000
    num_steps_sampled: 1599000
    num_steps_trained: 159

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1599,40716.6,1599000,-24.442,-21.5,-27.4,244.42


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1600000
  custom_metrics: {}
  date: 2021-10-22_07-05-00
  done: false
  episode_len_mean: 244.75
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -24.475000000000072
  episode_reward_min: -27.40000000000012
  episodes_this_iter: 4
  episodes_total: 4910
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.200546950519708e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.18182719912793902
          entropy_coeff: 0.009999999999999998
          kl: 0.009435755778783398
          policy_loss: 0.01473038751218054
          total_loss: 0.9492543253633711
          vf_explained_var: 0.35285818576812744
          vf_loss: 0.9363422109021081
    num_agent_steps_sampled: 1600000
    num_agent_steps_trained: 1600000
    num_steps_sampled: 1600000
    num_steps_trained: 1600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1600,40745.5,1600000,-24.475,-21.5,-27.4,244.75


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1601000
  custom_metrics: {}
  date: 2021-10-22_07-05-31
  done: false
  episode_len_mean: 244.38
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -24.438000000000084
  episode_reward_min: -27.300000000000118
  episodes_this_iter: 4
  episodes_total: 4914
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.200546950519708e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.14390765296088326
          entropy_coeff: 0.009999999999999998
          kl: 0.003665074242131647
          policy_loss: 0.02770287444194158
          total_loss: 0.9442526618639628
          vf_explained_var: 0.3122509717941284
          vf_loss: 0.9179888572957781
    num_agent_steps_sampled: 1601000
    num_agent_steps_trained: 1601000
    num_steps_sampled: 1601000
    num_steps_trained: 1601

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1601,40776.4,1601000,-24.438,-21.5,-27.3,244.38


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1602000
  custom_metrics: {}
  date: 2021-10-22_07-06-02
  done: false
  episode_len_mean: 244.13
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -24.41300000000008
  episode_reward_min: -27.300000000000118
  episodes_this_iter: 4
  episodes_total: 4918
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.100273475259854e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.19321772108475369
          entropy_coeff: 0.009999999999999998
          kl: 0.014827333341430441
          policy_loss: 0.042501231614086366
          total_loss: 0.9958318074544271
          vf_explained_var: 0.3439457416534424
          vf_loss: 0.9552627470758226
    num_agent_steps_sampled: 1602000
    num_agent_steps_trained: 1602000
    num_steps_sampled: 1602000
    num_steps_trained: 1602

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1602,40807,1602000,-24.413,-21.5,-27.3,244.13




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1603000
  custom_metrics: {}
  date: 2021-10-22_07-06-50
  done: false
  episode_len_mean: 243.93
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.393000000000075
  episode_reward_min: -27.300000000000118
  episodes_this_iter: 4
  episodes_total: 4922
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.100273475259854e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.18347423573335012
          entropy_coeff: 0.009999999999999998
          kl: 0.0026319956540684568
          policy_loss: 0.021200883636871972
          total_loss: 1.1490006380610995
          vf_explained_var: 0.2871173322200775
          vf_loss: 1.129634494913949
    num_agent_steps_sampled: 1603000
    num_agent_steps_trained: 1603000
    num_steps_sampled: 1603000
    num_steps_trained: 160

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1603,40855.2,1603000,-24.393,-21.4,-27.3,243.93


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1604000
  custom_metrics: {}
  date: 2021-10-22_07-07-21
  done: false
  episode_len_mean: 243.64
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.36400000000007
  episode_reward_min: -27.300000000000118
  episodes_this_iter: 4
  episodes_total: 4926
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.550136737629927e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.13259199783205985
          entropy_coeff: 0.009999999999999998
          kl: 0.003701443634628395
          policy_loss: 0.026467959582805633
          total_loss: 1.1320069074630736
          vf_explained_var: 0.24303176999092102
          vf_loss: 1.1068648788664075
    num_agent_steps_sampled: 1604000
    num_agent_steps_trained: 1604000
    num_steps_sampled: 1604000
    num_steps_trained: 160

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1604,40885.6,1604000,-24.364,-21.4,-27.3,243.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1605000
  custom_metrics: {}
  date: 2021-10-22_07-07-50
  done: false
  episode_len_mean: 243.64
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.36400000000008
  episode_reward_min: -27.300000000000118
  episodes_this_iter: 4
  episodes_total: 4930
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.750683688149634e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.1239765075345834
          entropy_coeff: 0.009999999999999998
          kl: 0.0008431250395005918
          policy_loss: -0.026092125930719905
          total_loss: 1.1231003297699822
          vf_explained_var: 0.20778439939022064
          vf_loss: 1.1504322250684103
    num_agent_steps_sampled: 1605000
    num_agent_steps_trained: 1605000
    num_steps_sampled: 1605000
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1605,40914.7,1605000,-24.364,-21.4,-27.3,243.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1606000
  custom_metrics: {}
  date: 2021-10-22_07-08-18
  done: false
  episode_len_mean: 244.21
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.42100000000008
  episode_reward_min: -29.900000000000155
  episodes_this_iter: 4
  episodes_total: 4934
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.875341844074817e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.12397344377305773
          entropy_coeff: 0.009999999999999998
          kl: 0.00888398423225346
          policy_loss: 0.015158272037903467
          total_loss: 1.2198970370822482
          vf_explained_var: 0.24807262420654297
          vf_loss: 1.2059785101148817
    num_agent_steps_sampled: 1606000
    num_agent_steps_trained: 1606000
    num_steps_sampled: 1606000
    num_steps_trained: 1606

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1606,40942.9,1606000,-24.421,-21.4,-29.9,244.21


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1607000
  custom_metrics: {}
  date: 2021-10-22_07-08-45
  done: false
  episode_len_mean: 244.43
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.443000000000076
  episode_reward_min: -29.900000000000155
  episodes_this_iter: 4
  episodes_total: 4938
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.875341844074817e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.09862561540471183
          entropy_coeff: 0.009999999999999998
          kl: 0.0015686175387584298
          policy_loss: 0.010140116719735994
          total_loss: 1.2300993164380392
          vf_explained_var: 0.26677364110946655
          vf_loss: 1.220945456292894
    num_agent_steps_sampled: 1607000
    num_agent_steps_trained: 1607000
    num_steps_sampled: 1607000
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1607,40969.4,1607000,-24.443,-21.4,-29.9,244.43


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1608000
  custom_metrics: {}
  date: 2021-10-22_07-09-15
  done: false
  episode_len_mean: 244.3
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.430000000000078
  episode_reward_min: -29.900000000000155
  episodes_this_iter: 4
  episodes_total: 4942
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9376709220374086e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.11563562204440435
          entropy_coeff: 0.009999999999999998
          kl: 0.0035572112054858785
          policy_loss: 0.01004320565197203
          total_loss: 1.1685162425041198
          vf_explained_var: 0.2870684862136841
          vf_loss: 1.159629389974806
    num_agent_steps_sampled: 1608000
    num_agent_steps_trained: 1608000
    num_steps_sampled: 1608000
    num_steps_trained: 1608

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1608,40999.5,1608000,-24.43,-21.4,-29.9,244.3


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1609000
  custom_metrics: {}
  date: 2021-10-22_07-09-43
  done: false
  episode_len_mean: 244.4
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.440000000000072
  episode_reward_min: -29.900000000000155
  episodes_this_iter: 4
  episodes_total: 4946
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.688354610187043e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.0754447025557359
          entropy_coeff: 0.009999999999999998
          kl: 0.003328708392337908
          policy_loss: 0.02942402238647143
          total_loss: 1.1894530759917366
          vf_explained_var: 0.218087300658226
          vf_loss: 1.160783510737949
    num_agent_steps_sampled: 1609000
    num_agent_steps_trained: 1609000
    num_steps_sampled: 1609000
    num_steps_trained: 1609000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1609,41028.2,1609000,-24.44,-21.4,-29.9,244.4




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1610000
  custom_metrics: {}
  date: 2021-10-22_07-10-31
  done: false
  episode_len_mean: 244.41
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.441000000000077
  episode_reward_min: -29.900000000000155
  episodes_this_iter: 4
  episodes_total: 4950
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.8441773050935215e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.22310296346743902
          entropy_coeff: 0.009999999999999998
          kl: 0.03557868792212338
          policy_loss: 0.03588489592075348
          total_loss: 1.0737908164660135
          vf_explained_var: 0.21230696141719818
          vf_loss: 1.0401369591554006
    num_agent_steps_sampled: 1610000
    num_agent_steps_trained: 1610000
    num_steps_sampled: 1610000
    num_steps_trained: 161

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1610,41075.5,1610000,-24.441,-21.4,-29.9,244.41


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1611000
  custom_metrics: {}
  date: 2021-10-22_07-10-59
  done: false
  episode_len_mean: 245.36
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.536000000000076
  episode_reward_min: -29.900000000000155
  episodes_this_iter: 4
  episodes_total: 4954
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.266265957640284e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.1463229801919725
          entropy_coeff: 0.009999999999999998
          kl: 0.05750129780212174
          policy_loss: 0.06105029450522529
          total_loss: 1.158495118551784
          vf_explained_var: 0.12313925474882126
          vf_loss: 1.0989080518484116
    num_agent_steps_sampled: 1611000
    num_agent_steps_trained: 1611000
    num_steps_sampled: 1611000
    num_steps_trained: 161100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1611,41103.4,1611000,-24.536,-21.4,-29.9,245.36


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1612000
  custom_metrics: {}
  date: 2021-10-22_07-11-27
  done: false
  episode_len_mean: 245.84
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.58400000000008
  episode_reward_min: -29.900000000000155
  episodes_this_iter: 3
  episodes_total: 4957
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0899398936460425e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.17602044492959976
          entropy_coeff: 0.009999999999999998
          kl: 0.02894857607112316
          policy_loss: -0.10269268784258101
          total_loss: 1.3531901982095507
          vf_explained_var: 0.06963062286376953
          vf_loss: 1.4576430956522624
    num_agent_steps_sampled: 1612000
    num_agent_steps_trained: 1612000
    num_steps_sampled: 1612000
    num_steps_trained: 161

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1612,41131.8,1612000,-24.584,-21.4,-29.9,245.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1613000
  custom_metrics: {}
  date: 2021-10-22_07-11-57
  done: false
  episode_len_mean: 246.29
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.62900000000008
  episode_reward_min: -29.900000000000155
  episodes_this_iter: 5
  episodes_total: 4962
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6349098404690638e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.16519413772556518
          entropy_coeff: 0.009999999999999998
          kl: 0.017459657386244724
          policy_loss: 0.002595955216222339
          total_loss: 1.2894071062405905
          vf_explained_var: 0.30097249150276184
          vf_loss: 1.2884630839029947
    num_agent_steps_sampled: 1613000
    num_agent_steps_trained: 1613000
    num_steps_sampled: 1613000
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1613,41161.4,1613000,-24.629,-21.4,-29.9,246.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1614000
  custom_metrics: {}
  date: 2021-10-22_07-12-24
  done: false
  episode_len_mean: 246.94
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.694000000000077
  episode_reward_min: -29.900000000000155
  episodes_this_iter: 3
  episodes_total: 4965
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6349098404690638e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.4975641212529606
          entropy_coeff: 0.009999999999999998
          kl: 0.08086263048146773
          policy_loss: -0.07908476351035966
          total_loss: 0.890057177013821
          vf_explained_var: 0.5398513078689575
          vf_loss: 0.9741175721089045
    num_agent_steps_sampled: 1614000
    num_agent_steps_trained: 1614000
    num_steps_sampled: 1614000
    num_steps_trained: 16140

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1614,41189,1614000,-24.694,-21.4,-29.9,246.94


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1615000
  custom_metrics: {}
  date: 2021-10-22_07-12-54
  done: false
  episode_len_mean: 247.26
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.72600000000008
  episode_reward_min: -29.900000000000155
  episodes_this_iter: 4
  episodes_total: 4969
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.452364760703596e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.19228439446952608
          entropy_coeff: 0.009999999999999998
          kl: 0.007505412428054114
          policy_loss: -0.04639849555161264
          total_loss: 1.079083991712994
          vf_explained_var: 0.4761918783187866
          vf_loss: 1.1274053394794463
    num_agent_steps_sampled: 1615000
    num_agent_steps_trained: 1615000
    num_steps_sampled: 1615000
    num_steps_trained: 16150

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1615,41218.7,1615000,-24.726,-21.4,-29.9,247.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1616000
  custom_metrics: {}
  date: 2021-10-22_07-13-23
  done: false
  episode_len_mean: 247.49
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.749000000000084
  episode_reward_min: -29.900000000000155
  episodes_this_iter: 4
  episodes_total: 4973
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.452364760703596e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.2134897506899304
          entropy_coeff: 0.009999999999999998
          kl: 0.0019009408030334562
          policy_loss: -0.10001434551344977
          total_loss: 1.5813660422960918
          vf_explained_var: 0.23409005999565125
          vf_loss: 1.6835152904192607
    num_agent_steps_sampled: 1616000
    num_agent_steps_trained: 1616000
    num_steps_sampled: 1616000
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1616,41247.4,1616000,-24.749,-21.4,-29.9,247.49


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1617000
  custom_metrics: {}
  date: 2021-10-22_07-13-52
  done: false
  episode_len_mean: 247.83
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.783000000000083
  episode_reward_min: -29.900000000000155
  episodes_this_iter: 4
  episodes_total: 4977
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.226182380351798e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.22371561080217361
          entropy_coeff: 0.009999999999999998
          kl: 0.0036120661972811755
          policy_loss: -0.1262658345202605
          total_loss: 1.1195496572388544
          vf_explained_var: 0.46029573678970337
          vf_loss: 1.2480526526769002
    num_agent_steps_sampled: 1617000
    num_agent_steps_trained: 1617000
    num_steps_sampled: 1617000
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1617,41276.9,1617000,-24.783,-21.4,-29.9,247.83




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1618000
  custom_metrics: {}
  date: 2021-10-22_07-14-38
  done: false
  episode_len_mean: 247.96
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.796000000000085
  episode_reward_min: -29.900000000000155
  episodes_this_iter: 5
  episodes_total: 4982
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.13091190175899e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.25413862781392205
          entropy_coeff: 0.009999999999999998
          kl: 0.017458223680441008
          policy_loss: -0.030263068113062116
          total_loss: 1.0781643125745985
          vf_explained_var: 0.5721275806427002
          vf_loss: 1.110968769258923
    num_agent_steps_sampled: 1618000
    num_agent_steps_trained: 1618000
    num_steps_sampled: 1618000
    num_steps_trained: 1618

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1618,41322.7,1618000,-24.796,-21.4,-29.9,247.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1619000
  custom_metrics: {}
  date: 2021-10-22_07-15-07
  done: false
  episode_len_mean: 248.25
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.82500000000008
  episode_reward_min: -29.900000000000155
  episodes_this_iter: 4
  episodes_total: 4986
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.13091190175899e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.16440906855795118
          entropy_coeff: 0.009999999999999998
          kl: 0.0030157523198028
          policy_loss: -0.008196873548958037
          total_loss: 0.8896798259682126
          vf_explained_var: 0.5406891703605652
          vf_loss: 0.8995207879278395
    num_agent_steps_sampled: 1619000
    num_agent_steps_trained: 1619000
    num_steps_sampled: 1619000
    num_steps_trained: 161900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1619,41351.7,1619000,-24.825,-21.4,-29.9,248.25


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1620000
  custom_metrics: {}
  date: 2021-10-22_07-15-37
  done: false
  episode_len_mean: 248.77
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.877000000000084
  episode_reward_min: -29.900000000000155
  episodes_this_iter: 4
  episodes_total: 4990
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.065455950879495e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.2940682778755824
          entropy_coeff: 0.009999999999999998
          kl: 0.05970425633086502
          policy_loss: -0.02596833606561025
          total_loss: 0.9125664393107097
          vf_explained_var: 0.4844852387905121
          vf_loss: 0.9414754602644179
    num_agent_steps_sampled: 1620000
    num_agent_steps_trained: 1620000
    num_steps_sampled: 1620000
    num_steps_trained: 16200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1620,41381.2,1620000,-24.877,-21.4,-29.9,248.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1621000
  custom_metrics: {}
  date: 2021-10-22_07-16-05
  done: false
  episode_len_mean: 249.0
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.900000000000087
  episode_reward_min: -29.900000000000155
  episodes_this_iter: 4
  episodes_total: 4994
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.5981839263192424e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.18638876461320453
          entropy_coeff: 0.009999999999999998
          kl: 0.00232779623607813
          policy_loss: 0.004409539285633299
          total_loss: 0.9423939983050028
          vf_explained_var: 0.3770424425601959
          vf_loss: 0.939848334259457
    num_agent_steps_sampled: 1621000
    num_agent_steps_trained: 1621000
    num_steps_sampled: 1621000
    num_steps_trained: 16210

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1621,41410,1621000,-24.9,-21.4,-29.9,249


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1622000
  custom_metrics: {}
  date: 2021-10-22_07-16-35
  done: false
  episode_len_mean: 249.24
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.92400000000009
  episode_reward_min: -29.900000000000155
  episodes_this_iter: 4
  episodes_total: 4998
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2990919631596212e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.3855121804608239
          entropy_coeff: 0.009999999999999998
          kl: 0.053613661381518164
          policy_loss: 0.020842490221063294
          total_loss: 1.0187787532806396
          vf_explained_var: 0.3199695646762848
          vf_loss: 1.0017913864718544
    num_agent_steps_sampled: 1622000
    num_agent_steps_trained: 1622000
    num_steps_sampled: 1622000
    num_steps_trained: 1622

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1622,41439.6,1622000,-24.924,-21.4,-29.9,249.24


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1623000
  custom_metrics: {}
  date: 2021-10-22_07-17-04
  done: false
  episode_len_mean: 249.7
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -24.97000000000008
  episode_reward_min: -29.900000000000155
  episodes_this_iter: 4
  episodes_total: 5002
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.448637944739431e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.6286719573868645
          entropy_coeff: 0.009999999999999998
          kl: 0.09297160087655938
          policy_loss: 0.048598101072841224
          total_loss: 0.9156528863641951
          vf_explained_var: 0.43014276027679443
          vf_loss: 0.8733414947986603
    num_agent_steps_sampled: 1623000
    num_agent_steps_trained: 1623000
    num_steps_sampled: 1623000
    num_steps_trained: 162300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1623,41468.2,1623000,-24.97,-21.4,-29.9,249.7


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1624000
  custom_metrics: {}
  date: 2021-10-22_07-17-19
  done: false
  episode_len_mean: 252.83
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -25.283000000000087
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 1
  episodes_total: 5003
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.1729569171091484e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.28969009071588514
          entropy_coeff: 0.009999999999999998
          kl: 0.056693928792902665
          policy_loss: -0.0051243797772460515
          total_loss: 0.6205495975083775
          vf_explained_var: 0.15517430007457733
          vf_loss: 0.6285708830588393
    num_agent_steps_sampled: 1624000
    num_agent_steps_trained: 1624000
    num_steps_sampled: 1624000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1624,41483.3,1624000,-25.283,-21.4,-55.4,252.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1625000
  custom_metrics: {}
  date: 2021-10-22_07-17-34
  done: false
  episode_len_mean: 258.14
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -25.814000000000096
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 2
  episodes_total: 5005
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.759435375663718e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.310108473069138
          entropy_coeff: 0.009999999999999998
          kl: 0.0047532609687604855
          policy_loss: -0.05561300449901157
          total_loss: 0.9272043897045983
          vf_explained_var: 0.03972119092941284
          vf_loss: 0.9859184738662508
    num_agent_steps_sampled: 1625000
    num_agent_steps_trained: 1625000
    num_steps_sampled: 1625000
    num_steps_trained: 1625

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1625,41498.8,1625000,-25.814,-21.4,-55.4,258.14


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1626000
  custom_metrics: {}
  date: 2021-10-22_07-17-52
  done: false
  episode_len_mean: 264.8
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -26.4800000000001
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 3
  episodes_total: 5008
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.879717687831859e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.2696182891726494
          entropy_coeff: 0.009999999999999998
          kl: 0.0047545867119623605
          policy_loss: 0.046407674252986905
          total_loss: 1.1477074550257789
          vf_explained_var: -0.07030744850635529
          vf_loss: 1.1039959715472327
    num_agent_steps_sampled: 1626000
    num_agent_steps_trained: 1626000
    num_steps_sampled: 1626000
    num_steps_trained: 16260

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1626,41516.2,1626000,-26.48,-21.4,-57.9,264.8




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1627000
  custom_metrics: {}
  date: 2021-10-22_07-18-26
  done: false
  episode_len_mean: 268.28
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -26.828000000000106
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 2
  episodes_total: 5010
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9398588439159295e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.3792362795935737
          entropy_coeff: 0.009999999999999998
          kl: 0.0031797045292929877
          policy_loss: 0.06858712285757065
          total_loss: 0.6732393986648983
          vf_explained_var: -0.4490259289741516
          vf_loss: 0.6084446366462443
    num_agent_steps_sampled: 1627000
    num_agent_steps_trained: 1627000
    num_steps_sampled: 1627000
    num_steps_trained: 162

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1627,41550.6,1627000,-26.828,-21.4,-57.9,268.28


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1628000
  custom_metrics: {}
  date: 2021-10-22_07-18-48
  done: false
  episode_len_mean: 271.94
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -27.19400000000012
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 3
  episodes_total: 5013
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.699294219579647e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.4231098963154687
          entropy_coeff: 0.009999999999999998
          kl: 0.004570242363737344
          policy_loss: 0.05072770565748215
          total_loss: 1.1875010258621639
          vf_explained_var: 0.038276176899671555
          vf_loss: 1.1410044305854374
    num_agent_steps_sampled: 1628000
    num_agent_steps_trained: 1628000
    num_steps_sampled: 1628000
    num_steps_trained: 16280

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1628,41572.5,1628000,-27.194,-21.4,-57.9,271.94


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1629000
  custom_metrics: {}
  date: 2021-10-22_07-19-10
  done: false
  episode_len_mean: 275.76
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -27.57600000000012
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 3
  episodes_total: 5016
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.849647109789824e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.40167831579844154
          entropy_coeff: 0.009999999999999998
          kl: 0.0029285877811506427
          policy_loss: 0.03372804464565383
          total_loss: 1.2276205937067668
          vf_explained_var: 0.06997302919626236
          vf_loss: 1.1979093293348948
    num_agent_steps_sampled: 1629000
    num_agent_steps_trained: 1629000
    num_steps_sampled: 1629000
    num_steps_trained: 1629

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1629,41594.3,1629000,-27.576,-21.4,-57.9,275.76


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1630000
  custom_metrics: {}
  date: 2021-10-22_07-19-34
  done: false
  episode_len_mean: 277.82
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -27.78200000000012
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 3
  episodes_total: 5019
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.424823554894912e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.4277254856295056
          entropy_coeff: 0.009999999999999998
          kl: 0.016471466556394098
          policy_loss: 0.06554868237839805
          total_loss: 1.2097964919275708
          vf_explained_var: 0.0837247297167778
          vf_loss: 1.1485250800848008
    num_agent_steps_sampled: 1630000
    num_agent_steps_trained: 1630000
    num_steps_sampled: 1630000
    num_steps_trained: 1630000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1630,41618,1630000,-27.782,-21.4,-57.9,277.82


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1631000
  custom_metrics: {}
  date: 2021-10-22_07-20-00
  done: false
  episode_len_mean: 278.54
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -27.854000000000127
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 3
  episodes_total: 5022
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.424823554894912e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.5094576752848096
          entropy_coeff: 0.009999999999999998
          kl: 0.009248160904944945
          policy_loss: -0.08985361920462714
          total_loss: 1.5608418703079223
          vf_explained_var: 0.09383490681648254
          vf_loss: 1.6557900733417934
    num_agent_steps_sampled: 1631000
    num_agent_steps_trained: 1631000
    num_steps_sampled: 1631000
    num_steps_trained: 1631

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1631,41644.8,1631000,-27.854,-21.4,-57.9,278.54


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1632000
  custom_metrics: {}
  date: 2021-10-22_07-20-28
  done: false
  episode_len_mean: 279.87
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -27.98700000000013
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 4
  episodes_total: 5026
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.424823554894912e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.5004050254821777
          entropy_coeff: 0.009999999999999998
          kl: 0.007349543478804163
          policy_loss: -0.0006609448128276402
          total_loss: 1.470791725317637
          vf_explained_var: 0.22287188470363617
          vf_loss: 1.4764567335446677
    num_agent_steps_sampled: 1632000
    num_agent_steps_trained: 1632000
    num_steps_sampled: 1632000
    num_steps_trained: 1632

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1632,41672.8,1632000,-27.987,-21.4,-57.9,279.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1633000
  custom_metrics: {}
  date: 2021-10-22_07-20-56
  done: false
  episode_len_mean: 280.68
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -28.068000000000126
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 4
  episodes_total: 5030
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.424823554894912e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.5697091499964396
          entropy_coeff: 0.009999999999999998
          kl: 0.006496070982142833
          policy_loss: 0.016830835905339982
          total_loss: 1.4997760600513883
          vf_explained_var: 0.2062220275402069
          vf_loss: 1.4886423071225485
    num_agent_steps_sampled: 1633000
    num_agent_steps_trained: 1633000
    num_steps_sampled: 1633000
    num_steps_trained: 16330

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1633,41700.6,1633000,-28.068,-21.4,-57.9,280.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1634000
  custom_metrics: {}
  date: 2021-10-22_07-21-23
  done: false
  episode_len_mean: 280.95
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -28.095000000000127
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 4
  episodes_total: 5034
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.424823554894912e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.7777427388562097
          entropy_coeff: 0.009999999999999998
          kl: 0.08067744465701104
          policy_loss: 0.02207314446568489
          total_loss: 1.2503469579749638
          vf_explained_var: 0.27267152070999146
          vf_loss: 1.2360512528154586
    num_agent_steps_sampled: 1634000
    num_agent_steps_trained: 1634000
    num_steps_sampled: 1634000
    num_steps_trained: 163400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1634,41727.7,1634000,-28.095,-21.4,-57.9,280.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1635000
  custom_metrics: {}
  date: 2021-10-22_07-21-52
  done: false
  episode_len_mean: 280.5
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -28.05000000000012
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 4
  episodes_total: 5038
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6372353323423687e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.7820734666453467
          entropy_coeff: 0.009999999999999998
          kl: 0.005199075050840444
          policy_loss: -0.016103530012898975
          total_loss: 1.2872277326054042
          vf_explained_var: 0.095302514731884
          vf_loss: 1.3111519985728795
    num_agent_steps_sampled: 1635000
    num_agent_steps_trained: 1635000
    num_steps_sampled: 1635000
    num_steps_trained: 163500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1635,41756.6,1635000,-28.05,-21.4,-57.9,280.5




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1636000
  custom_metrics: {}
  date: 2021-10-22_07-22-36
  done: false
  episode_len_mean: 281.22
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -28.122000000000124
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 3
  episodes_total: 5041
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6372353323423687e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.815922408633762
          entropy_coeff: 0.009999999999999998
          kl: 0.06019465905204381
          policy_loss: 0.027178077234162226
          total_loss: 1.0264437201950285
          vf_explained_var: 0.18849168717861176
          vf_loss: 1.0074248588747448
    num_agent_steps_sampled: 1636000
    num_agent_steps_trained: 1636000
    num_steps_sampled: 1636000
    num_steps_trained: 16360

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1636,41799.9,1636000,-28.122,-21.4,-57.9,281.22


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1637000
  custom_metrics: {}
  date: 2021-10-22_07-22-59
  done: false
  episode_len_mean: 283.12
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -28.312000000000126
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 3
  episodes_total: 5044
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.455852998513554e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.7799850351280636
          entropy_coeff: 0.009999999999999998
          kl: 0.02285682985711301
          policy_loss: -0.09109456853734123
          total_loss: 1.4418609592649672
          vf_explained_var: 0.0892663449048996
          vf_loss: 1.5407553884718153
    num_agent_steps_sampled: 1637000
    num_agent_steps_trained: 1637000
    num_steps_sampled: 1637000
    num_steps_trained: 163700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1637,41823,1637000,-28.312,-21.4,-57.9,283.12


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1638000
  custom_metrics: {}
  date: 2021-10-22_07-23-27
  done: false
  episode_len_mean: 284.17
  episode_media: {}
  episode_reward_max: -21.400000000000034
  episode_reward_mean: -28.41700000000013
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 4
  episodes_total: 5048
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.18377949777033e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.762423653072781
          entropy_coeff: 0.009999999999999998
          kl: 0.021931062341830437
          policy_loss: 0.041248706604043645
          total_loss: 1.1539399524529774
          vf_explained_var: 0.21764107048511505
          vf_loss: 1.1203154868549772
    num_agent_steps_sampled: 1638000
    num_agent_steps_trained: 1638000
    num_steps_sampled: 1638000
    num_steps_trained: 1638000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1638,41851.3,1638000,-28.417,-21.4,-57.9,284.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1639000
  custom_metrics: {}
  date: 2021-10-22_07-23-55
  done: false
  episode_len_mean: 285.02
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -28.50200000000013
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 4
  episodes_total: 5052
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2275669246655498e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.6774388055006663
          entropy_coeff: 0.009999999999999998
          kl: 0.02716736148957308
          policy_loss: 0.00039991351465384163
          total_loss: 1.1894981510109373
          vf_explained_var: 0.4030153453350067
          vf_loss: 1.195872645245658
    num_agent_steps_sampled: 1639000
    num_agent_steps_trained: 1639000
    num_steps_sampled: 1639000
    num_steps_trained: 16390

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1639,41879.5,1639000,-28.502,-22.1,-57.9,285.02


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1640000
  custom_metrics: {}
  date: 2021-10-22_07-24-21
  done: false
  episode_len_mean: 285.19
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -28.51900000000013
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 4
  episodes_total: 5056
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8413503869983235e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.732695789469613
          entropy_coeff: 0.009999999999999998
          kl: 0.0426591977471582
          policy_loss: -0.028457153091828027
          total_loss: 1.477738008234236
          vf_explained_var: 0.20987717807292938
          vf_loss: 1.5135221150186327
    num_agent_steps_sampled: 1640000
    num_agent_steps_trained: 1640000
    num_steps_sampled: 1640000
    num_steps_trained: 1640000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1640,41905.6,1640000,-28.519,-22.1,-57.9,285.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1641000
  custom_metrics: {}
  date: 2021-10-22_07-24-48
  done: false
  episode_len_mean: 285.5
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -28.550000000000132
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 3
  episodes_total: 5059
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.762025580497486e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.9710034184985691
          entropy_coeff: 0.009999999999999998
          kl: 0.023270791455070456
          policy_loss: 0.03314262040787273
          total_loss: 1.045460875166787
          vf_explained_var: 0.335744172334671
          vf_loss: 1.0220282899008857
    num_agent_steps_sampled: 1641000
    num_agent_steps_trained: 1641000
    num_steps_sampled: 1641000
    num_steps_trained: 1641000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1641,41932.5,1641000,-28.55,-22.1,-57.9,285.5


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1642000
  custom_metrics: {}
  date: 2021-10-22_07-25-17
  done: false
  episode_len_mean: 285.89
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -28.589000000000137
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 4
  episodes_total: 5063
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.14303837074623e-58
          cur_lr: 5.000000000000001e-05
          entropy: 1.4214090837372675
          entropy_coeff: 0.009999999999999998
          kl: 0.060345960038430096
          policy_loss: 0.005496942458881272
          total_loss: 1.7918472833103605
          vf_explained_var: 0.17328014969825745
          vf_loss: 1.800564420223236
    num_agent_steps_sampled: 1642000
    num_agent_steps_trained: 1642000
    num_steps_sampled: 1642000
    num_steps_trained: 164200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1642,41961.2,1642000,-28.589,-22.1,-57.9,285.89


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1643000
  custom_metrics: {}
  date: 2021-10-22_07-25-41
  done: false
  episode_len_mean: 287.29
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -28.729000000000138
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 3
  episodes_total: 5066
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.214557556119344e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.9903478066126505
          entropy_coeff: 0.009999999999999998
          kl: 0.042302805342841844
          policy_loss: -0.0991277541551325
          total_loss: 1.1578478866153292
          vf_explained_var: 0.42290470004081726
          vf_loss: 1.2668791201379563
    num_agent_steps_sampled: 1643000
    num_agent_steps_trained: 1643000
    num_steps_sampled: 1643000
    num_steps_trained: 16430

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1643,41984.9,1643000,-28.729,-22.1,-57.9,287.29




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1644000
  custom_metrics: {}
  date: 2021-10-22_07-26-22
  done: false
  episode_len_mean: 288.94
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -28.894000000000137
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 4
  episodes_total: 5070
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.321836334179017e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.852833092212677
          entropy_coeff: 0.009999999999999998
          kl: 0.020852838521745475
          policy_loss: 0.005978140897221035
          total_loss: 1.0936019314659966
          vf_explained_var: 0.5539462566375732
          vf_loss: 1.0961521281136406
    num_agent_steps_sampled: 1644000
    num_agent_steps_trained: 1644000
    num_steps_sampled: 1644000
    num_steps_trained: 164400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1644,42026.3,1644000,-28.894,-22.1,-57.9,288.94


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1645000
  custom_metrics: {}
  date: 2021-10-22_07-26-52
  done: false
  episode_len_mean: 290.02
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -29.002000000000145
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 3
  episodes_total: 5073
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3982754501268525e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.8125999708970387
          entropy_coeff: 0.009999999999999998
          kl: 0.015144819775550013
          policy_loss: -0.0874386328789923
          total_loss: 1.1219517052173615
          vf_explained_var: 0.42073774337768555
          vf_loss: 1.2175163288911184
    num_agent_steps_sampled: 1645000
    num_agent_steps_trained: 1645000
    num_steps_sampled: 1645000
    num_steps_trained: 1645

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1645,42055.8,1645000,-29.002,-22.1,-57.9,290.02


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1646000
  custom_metrics: {}
  date: 2021-10-22_07-27-18
  done: false
  episode_len_mean: 290.71
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -29.071000000000144
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 4
  episodes_total: 5077
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3982754501268525e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.8833857198556264
          entropy_coeff: 0.009999999999999998
          kl: 0.004233134339487618
          policy_loss: 0.017322137869066664
          total_loss: 1.309728741645813
          vf_explained_var: 0.3809940218925476
          vf_loss: 1.3012404693497552
    num_agent_steps_sampled: 1646000
    num_agent_steps_trained: 1646000
    num_steps_sampled: 1646000
    num_steps_trained: 16460

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1646,42082.4,1646000,-29.071,-22.1,-57.9,290.71


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1647000
  custom_metrics: {}
  date: 2021-10-22_07-27-47
  done: false
  episode_len_mean: 291.5
  episode_media: {}
  episode_reward_max: -24.300000000000075
  episode_reward_mean: -29.15000000000014
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 4
  episodes_total: 5081
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.991377250634263e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.833285172118081
          entropy_coeff: 0.009999999999999998
          kl: 0.00492668274769825
          policy_loss: 0.032488427228397794
          total_loss: 1.29317018588384
          vf_explained_var: 0.37271106243133545
          vf_loss: 1.2690146101845636
    num_agent_steps_sampled: 1647000
    num_agent_steps_trained: 1647000
    num_steps_sampled: 1647000
    num_steps_trained: 1647000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1647,42110.6,1647000,-29.15,-24.3,-57.9,291.5


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1648000
  custom_metrics: {}
  date: 2021-10-22_07-28-15
  done: false
  episode_len_mean: 291.72
  episode_media: {}
  episode_reward_max: -24.300000000000075
  episode_reward_mean: -29.172000000000143
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 4
  episodes_total: 5085
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.495688625317131e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.7840636736816831
          entropy_coeff: 0.009999999999999998
          kl: 0.004980241545485311
          policy_loss: 0.03970153662893507
          total_loss: 1.2217463824484083
          vf_explained_var: 0.41653311252593994
          vf_loss: 1.1898854752381642
    num_agent_steps_sampled: 1648000
    num_agent_steps_trained: 1648000
    num_steps_sampled: 1648000
    num_steps_trained: 16480

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1648,42138.9,1648000,-29.172,-24.3,-57.9,291.72


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1649000
  custom_metrics: {}
  date: 2021-10-22_07-28-43
  done: false
  episode_len_mean: 292.29
  episode_media: {}
  episode_reward_max: -24.300000000000075
  episode_reward_mean: -29.229000000000145
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 4
  episodes_total: 5089
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7478443126585656e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.8324558105733659
          entropy_coeff: 0.009999999999999998
          kl: 0.012341058902369974
          policy_loss: -0.006012158013052411
          total_loss: 1.1198830915821922
          vf_explained_var: 0.4505635201931
          vf_loss: 1.1342198073863983
    num_agent_steps_sampled: 1649000
    num_agent_steps_trained: 1649000
    num_steps_sampled: 1649000
    num_steps_trained: 164900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1649,42167.3,1649000,-29.229,-24.3,-57.9,292.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1650000
  custom_metrics: {}
  date: 2021-10-22_07-29-12
  done: false
  episode_len_mean: 292.86
  episode_media: {}
  episode_reward_max: -24.400000000000077
  episode_reward_mean: -29.28600000000015
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 4
  episodes_total: 5093
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7478443126585656e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.8204862607849969
          entropy_coeff: 0.009999999999999998
          kl: 0.05430334176524122
          policy_loss: 0.022116399390829933
          total_loss: 0.8197818875312806
          vf_explained_var: 0.5744484663009644
          vf_loss: 0.8058703468905555
    num_agent_steps_sampled: 1650000
    num_agent_steps_trained: 1650000
    num_steps_sampled: 1650000
    num_steps_trained: 165000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1650,42196.3,1650000,-29.286,-24.4,-57.9,292.86


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1651000
  custom_metrics: {}
  date: 2021-10-22_07-29-41
  done: false
  episode_len_mean: 293.11
  episode_media: {}
  episode_reward_max: -24.70000000000008
  episode_reward_mean: -29.311000000000153
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 4
  episodes_total: 5097
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.621766468987849e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.48258153597513836
          entropy_coeff: 0.009999999999999998
          kl: 0.005615474800406951
          policy_loss: -0.026471149052182834
          total_loss: 0.8606680446200901
          vf_explained_var: 0.613577127456665
          vf_loss: 0.8919650144047208
    num_agent_steps_sampled: 1651000
    num_agent_steps_trained: 1651000
    num_steps_sampled: 1651000
    num_steps_trained: 16510

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1651,42225.1,1651000,-29.311,-24.7,-57.9,293.11




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1652000
  custom_metrics: {}
  date: 2021-10-22_07-30-27
  done: false
  episode_len_mean: 293.07
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -29.30700000000015
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 4
  episodes_total: 5101
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.621766468987849e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.3802929782205158
          entropy_coeff: 0.009999999999999998
          kl: 0.031504330962071475
          policy_loss: -0.016260888593064415
          total_loss: 1.1594523304038578
          vf_explained_var: 0.38542529940605164
          vf_loss: 1.1795161392953661
    num_agent_steps_sampled: 1652000
    num_agent_steps_trained: 1652000
    num_steps_sampled: 1652000
    num_steps_trained: 16520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1652,42271.1,1652000,-29.307,-21.9,-57.9,293.07


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1653000
  custom_metrics: {}
  date: 2021-10-22_07-30-57
  done: false
  episode_len_mean: 284.68
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -28.46800000000014
  episode_reward_min: -57.90000000000055
  episodes_this_iter: 4
  episodes_total: 5105
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.932649703481771e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.5589410169257059
          entropy_coeff: 0.009999999999999998
          kl: 0.046319509822044656
          policy_loss: -0.03371033080750042
          total_loss: 0.9881939523749881
          vf_explained_var: 0.3798915445804596
          vf_loss: 1.0274937000539568
    num_agent_steps_sampled: 1653000
    num_agent_steps_trained: 1653000
    num_steps_sampled: 1653000
    num_steps_trained: 1653000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1653,42300.5,1653000,-28.468,-21.9,-57.9,284.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1654000
  custom_metrics: {}
  date: 2021-10-22_07-31-26
  done: false
  episode_len_mean: 275.89
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -27.589000000000123
  episode_reward_min: -42.600000000000335
  episodes_this_iter: 4
  episodes_total: 5109
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.898974555222658e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.31365234007438025
          entropy_coeff: 0.009999999999999998
          kl: 0.013163698207605042
          policy_loss: -0.0062037948105070325
          total_loss: 0.8778117424911923
          vf_explained_var: 0.4737957715988159
          vf_loss: 0.8871520552370283
    num_agent_steps_sampled: 1654000
    num_agent_steps_trained: 1654000
    num_steps_sampled: 1654000
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1654,42330.3,1654000,-27.589,-21.9,-42.6,275.89


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1655000
  custom_metrics: {}
  date: 2021-10-22_07-31-56
  done: false
  episode_len_mean: 270.46
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -27.04600000000011
  episode_reward_min: -39.20000000000029
  episodes_this_iter: 4
  episodes_total: 5113
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.898974555222658e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.2961614027619362
          entropy_coeff: 0.009999999999999998
          kl: 0.00470188037121141
          policy_loss: 0.04649790790345934
          total_loss: 0.9159939666589101
          vf_explained_var: 0.35739022493362427
          vf_loss: 0.8724576758013831
    num_agent_steps_sampled: 1655000
    num_agent_steps_trained: 1655000
    num_steps_sampled: 1655000
    num_steps_trained: 1655000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1655,42359.6,1655000,-27.046,-21.9,-39.2,270.46


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1656000
  custom_metrics: {}
  date: 2021-10-22_07-32-26
  done: false
  episode_len_mean: 266.35
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.635000000000108
  episode_reward_min: -34.40000000000022
  episodes_this_iter: 4
  episodes_total: 5117
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.949487277611329e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.3331959909862942
          entropy_coeff: 0.009999999999999998
          kl: 0.0054755871059129725
          policy_loss: 0.031566737509436074
          total_loss: 1.0898192511664497
          vf_explained_var: 0.2610691785812378
          vf_loss: 1.0615844580862257
    num_agent_steps_sampled: 1656000
    num_agent_steps_trained: 1656000
    num_steps_sampled: 1656000
    num_steps_trained: 16560

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1656,42389.6,1656000,-26.635,-21.9,-34.4,266.35


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1657000
  custom_metrics: {}
  date: 2021-10-22_07-32-55
  done: false
  episode_len_mean: 264.24
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.424000000000103
  episode_reward_min: -33.3000000000002
  episodes_this_iter: 4
  episodes_total: 5121
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.949487277611329e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.34308069083425735
          entropy_coeff: 0.009999999999999998
          kl: 0.012850055318665296
          policy_loss: 0.051489225195513834
          total_loss: 0.9758717503812578
          vf_explained_var: 0.31654465198516846
          vf_loss: 0.9278133160538143
    num_agent_steps_sampled: 1657000
    num_agent_steps_trained: 1657000
    num_steps_sampled: 1657000
    num_steps_trained: 16570

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1657,42419.3,1657000,-26.424,-21.9,-33.3,264.24


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1658000
  custom_metrics: {}
  date: 2021-10-22_07-33-25
  done: false
  episode_len_mean: 263.08
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.308000000000103
  episode_reward_min: -33.3000000000002
  episodes_this_iter: 4
  episodes_total: 5125
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.949487277611329e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.338374100625515
          entropy_coeff: 0.009999999999999998
          kl: 0.05014439699731004
          policy_loss: 0.020218838585747613
          total_loss: 1.0595864375432333
          vf_explained_var: 0.2826513350009918
          vf_loss: 1.0427513533168369
    num_agent_steps_sampled: 1658000
    num_agent_steps_trained: 1658000
    num_steps_sampled: 1658000
    num_steps_trained: 1658000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1658,42449.1,1658000,-26.308,-21.9,-33.3,263.08


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1659000
  custom_metrics: {}
  date: 2021-10-22_07-33-50
  done: false
  episode_len_mean: 264.31
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.4310000000001
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 3
  episodes_total: 5128
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.424230916416994e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.25860894338952173
          entropy_coeff: 0.009999999999999998
          kl: 0.03302359248953905
          policy_loss: 0.009138275351789262
          total_loss: 1.0177073922422197
          vf_explained_var: 0.07365991920232773
          vf_loss: 1.0111552049716315
    num_agent_steps_sampled: 1659000
    num_agent_steps_trained: 1659000
    num_steps_sampled: 1659000
    num_steps_trained: 1659000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1659,42473.9,1659000,-26.431,-21.9,-36.1,264.31




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1660000
  custom_metrics: {}
  date: 2021-10-22_07-34-36
  done: false
  episode_len_mean: 263.86
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.386000000000102
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 4
  episodes_total: 5132
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.63634637462549e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.17493149687846501
          entropy_coeff: 0.009999999999999998
          kl: 0.019000525702799424
          policy_loss: -0.026915027615096836
          total_loss: 0.9746320558918847
          vf_explained_var: 0.3478604853153229
          vf_loss: 1.0032963865333133
    num_agent_steps_sampled: 1660000
    num_agent_steps_trained: 1660000
    num_steps_sampled: 1660000
    num_steps_trained: 16600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1660,42519.9,1660000,-26.386,-21.9,-36.1,263.86


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1661000
  custom_metrics: {}
  date: 2021-10-22_07-35-07
  done: false
  episode_len_mean: 262.87
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.287000000000095
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 4
  episodes_total: 5136
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.63634637462549e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.23204025708966786
          entropy_coeff: 0.009999999999999998
          kl: 0.04321728662396992
          policy_loss: -0.08286845179067717
          total_loss: 1.1470814406871797
          vf_explained_var: 0.31672006845474243
          vf_loss: 1.2322702990637886
    num_agent_steps_sampled: 1661000
    num_agent_steps_trained: 1661000
    num_steps_sampled: 1661000
    num_steps_trained: 166100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1661,42550.4,1661000,-26.287,-21.9,-36.1,262.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1662000
  custom_metrics: {}
  date: 2021-10-22_07-35-36
  done: false
  episode_len_mean: 261.76
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.176000000000105
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 5
  episodes_total: 5141
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.954519561938234e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.3010551994873418
          entropy_coeff: 0.009999999999999998
          kl: 0.18645189213335928
          policy_loss: -0.020017859463890394
          total_loss: 1.3005873892042372
          vf_explained_var: 0.3857647776603699
          vf_loss: 1.323615809281667
    num_agent_steps_sampled: 1662000
    num_agent_steps_trained: 1662000
    num_steps_sampled: 1662000
    num_steps_trained: 1662000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1662,42579.6,1662000,-26.176,-21.9,-36.1,261.76


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1663000
  custom_metrics: {}
  date: 2021-10-22_07-36-06
  done: false
  episode_len_mean: 259.04
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -25.904000000000096
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 4
  episodes_total: 5145
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.493177934290735e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.2581752131382624
          entropy_coeff: 0.009999999999999998
          kl: 0.0474654139604594
          policy_loss: -0.024172984229193794
          total_loss: 0.8932672202587127
          vf_explained_var: 0.5113707184791565
          vf_loss: 0.9200219498740302
    num_agent_steps_sampled: 1663000
    num_agent_steps_trained: 1663000
    num_steps_sampled: 1663000
    num_steps_trained: 1663000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1663,42609.3,1663000,-25.904,-21.9,-36.1,259.04


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1664000
  custom_metrics: {}
  date: 2021-10-22_07-36-35
  done: false
  episode_len_mean: 258.09
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -25.8090000000001
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 4
  episodes_total: 5149
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2397669014361025e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.09949276389347182
          entropy_coeff: 0.009999999999999998
          kl: 0.0017124440211511505
          policy_loss: 0.008013589142097367
          total_loss: 0.9847174723943074
          vf_explained_var: 0.38501065969467163
          vf_loss: 0.977698810895284
    num_agent_steps_sampled: 1664000
    num_agent_steps_trained: 1664000
    num_steps_sampled: 1664000
    num_steps_trained: 16640

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1664,42638.6,1664000,-25.809,-21.9,-36.1,258.09


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1665000
  custom_metrics: {}
  date: 2021-10-22_07-37-06
  done: false
  episode_len_mean: 257.32
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -25.732000000000102
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 4
  episodes_total: 5153
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1198834507180512e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.12511500095327696
          entropy_coeff: 0.009999999999999998
          kl: 0.0016904078326262667
          policy_loss: 0.031002545770671634
          total_loss: 1.1263400581147935
          vf_explained_var: 0.28370147943496704
          vf_loss: 1.096588675181071
    num_agent_steps_sampled: 1665000
    num_agent_steps_trained: 1665000
    num_steps_sampled: 1665000
    num_steps_trained: 166

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1665,42669.5,1665000,-25.732,-21.9,-36.1,257.32


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1666000
  custom_metrics: {}
  date: 2021-10-22_07-37-36
  done: false
  episode_len_mean: 256.19
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -25.619000000000096
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 4
  episodes_total: 5157
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.599417253590256e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.12092493524154027
          entropy_coeff: 0.009999999999999998
          kl: 0.00418093731935569
          policy_loss: 0.03336643460724089
          total_loss: 1.0644249790244633
          vf_explained_var: 0.35024330019950867
          vf_loss: 1.03226780295372
    num_agent_steps_sampled: 1666000
    num_agent_steps_trained: 1666000
    num_steps_sampled: 1666000
    num_steps_trained: 1666000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1666,42699.2,1666000,-25.619,-21.9,-36.1,256.19




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1667000
  custom_metrics: {}
  date: 2021-10-22_07-38-25
  done: false
  episode_len_mean: 254.84
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -25.484000000000098
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 4
  episodes_total: 5161
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.799708626795128e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.2898924814330207
          entropy_coeff: 0.009999999999999998
          kl: 0.1634558410231478
          policy_loss: -0.027911822663413154
          total_loss: 0.9618683324919807
          vf_explained_var: 0.4077688753604889
          vf_loss: 0.9926790826850467
    num_agent_steps_sampled: 1667000
    num_agent_steps_trained: 1667000
    num_steps_sampled: 1667000
    num_steps_trained: 1667000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1667,42748.3,1667000,-25.484,-21.2,-36.1,254.84


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1668000
  custom_metrics: {}
  date: 2021-10-22_07-38-54
  done: false
  episode_len_mean: 253.25
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -25.32500000000009
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 4
  episodes_total: 5165
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.199562940192692e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.13498219955298635
          entropy_coeff: 0.009999999999999998
          kl: 0.032908901416724125
          policy_loss: 0.0046246619688140025
          total_loss: 1.1478027522563934
          vf_explained_var: 0.2641581892967224
          vf_loss: 1.1445279094907972
    num_agent_steps_sampled: 1668000
    num_agent_steps_trained: 1668000
    num_steps_sampled: 1668000
    num_steps_trained: 16680

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1668,42777.5,1668000,-25.325,-21.2,-36.1,253.25


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1669000
  custom_metrics: {}
  date: 2021-10-22_07-39-25
  done: false
  episode_len_mean: 250.63
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -25.063000000000088
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 5
  episodes_total: 5170
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.2993444102890406e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.23125514371527567
          entropy_coeff: 0.009999999999999998
          kl: 0.022239781322589476
          policy_loss: -0.0074282796846495735
          total_loss: 1.3169473045402103
          vf_explained_var: 0.4005896747112274
          vf_loss: 1.3266881273852453
    num_agent_steps_sampled: 1669000
    num_agent_steps_trained: 1669000
    num_steps_sampled: 1669000
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1669,42808.4,1669000,-25.063,-21.2,-36.1,250.63


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1670000
  custom_metrics: {}
  date: 2021-10-22_07-39-54
  done: false
  episode_len_mean: 249.29
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -24.929000000000087
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 4
  episodes_total: 5174
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.44901661543356e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.1998208506239785
          entropy_coeff: 0.009999999999999998
          kl: 0.012356553584869643
          policy_loss: 0.03098616600036621
          total_loss: 0.8562906914287143
          vf_explained_var: 0.5127916932106018
          vf_loss: 0.8273027367062039
    num_agent_steps_sampled: 1670000
    num_agent_steps_trained: 1670000
    num_steps_sampled: 1670000
    num_steps_trained: 1670000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1670,42837.3,1670000,-24.929,-21.2,-36.1,249.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1671000
  custom_metrics: {}
  date: 2021-10-22_07-40-27
  done: false
  episode_len_mean: 247.45
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -24.745000000000083
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 4
  episodes_total: 5178
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.44901661543356e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.2039554898109701
          entropy_coeff: 0.009999999999999998
          kl: 0.03161536165037325
          policy_loss: 0.03709432946311103
          total_loss: 0.9726405885484484
          vf_explained_var: 0.2205633521080017
          vf_loss: 0.9375858147939046
    num_agent_steps_sampled: 1671000
    num_agent_steps_trained: 1671000
    num_steps_sampled: 1671000
    num_steps_trained: 1671000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1671,42870.5,1671000,-24.745,-21.2,-36.1,247.45


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1672000
  custom_metrics: {}
  date: 2021-10-22_07-40-57
  done: false
  episode_len_mean: 246.33
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -24.633000000000084
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 4
  episodes_total: 5182
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4173524923150338e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.09882717629273732
          entropy_coeff: 0.009999999999999998
          kl: 0.0018473616338851053
          policy_loss: -0.07828923683199618
          total_loss: 1.2153006805313957
          vf_explained_var: 0.16111408174037933
          vf_loss: 1.2945782038900586
    num_agent_steps_sampled: 1672000
    num_agent_steps_trained: 1672000
    num_steps_sampled: 1672000
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1672,42900.9,1672000,-24.633,-21.2,-36.1,246.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1673000
  custom_metrics: {}
  date: 2021-10-22_07-41-28
  done: false
  episode_len_mean: 245.33
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -24.53300000000008
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 5
  episodes_total: 5187
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.086762461575169e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.10798458034793536
          entropy_coeff: 0.009999999999999998
          kl: 0.011785267651205484
          policy_loss: 0.004314904784162839
          total_loss: 1.1984859923521678
          vf_explained_var: 0.3446927070617676
          vf_loss: 1.195250917143292
    num_agent_steps_sampled: 1673000
    num_agent_steps_trained: 1673000
    num_steps_sampled: 1673000
    num_steps_trained: 1673000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1673,42931.7,1673000,-24.533,-21.2,-36.1,245.33




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1674000
  custom_metrics: {}
  date: 2021-10-22_07-42-18
  done: false
  episode_len_mean: 243.61
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -24.361000000000075
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 4
  episodes_total: 5191
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.086762461575169e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.13051815285450882
          entropy_coeff: 0.009999999999999998
          kl: 0.00943893337094508
          policy_loss: -0.006696525754200088
          total_loss: 1.2892720328436957
          vf_explained_var: 0.08115532994270325
          vf_loss: 1.2972737272580466
    num_agent_steps_sampled: 1674000
    num_agent_steps_trained: 1674000
    num_steps_sampled: 1674000
    num_steps_trained: 167

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1674,42981.9,1674000,-24.361,-18.7,-36.1,243.61


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1675000
  custom_metrics: {}
  date: 2021-10-22_07-42-51
  done: false
  episode_len_mean: 242.17
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -24.21700000000007
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 5
  episodes_total: 5196
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.086762461575169e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.12665672049754195
          entropy_coeff: 0.009999999999999998
          kl: 0.0038636736911296567
          policy_loss: -0.01432826394836108
          total_loss: 1.5872729725307888
          vf_explained_var: 0.19553571939468384
          vf_loss: 1.602867790725496
    num_agent_steps_sampled: 1675000
    num_agent_steps_trained: 1675000
    num_steps_sampled: 1675000
    num_steps_trained: 1675

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1675,43014.1,1675000,-24.217,-18.7,-36.1,242.17


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1676000
  custom_metrics: {}
  date: 2021-10-22_07-43-25
  done: false
  episode_len_mean: 240.83
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -24.08300000000007
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 4
  episodes_total: 5200
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5433812307875845e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.0734483990404341
          entropy_coeff: 0.009999999999999998
          kl: 0.0005048633700832935
          policy_loss: 0.0167932514515188
          total_loss: 1.327204438050588
          vf_explained_var: 0.0170254185795784
          vf_loss: 1.3111456526650322
    num_agent_steps_sampled: 1676000
    num_agent_steps_trained: 1676000
    num_steps_sampled: 1676000
    num_steps_trained: 1676000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1676,43048.1,1676000,-24.083,-18.7,-36.1,240.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1677000
  custom_metrics: {}
  date: 2021-10-22_07-43-57
  done: false
  episode_len_mean: 239.27
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.927000000000067
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 5
  episodes_total: 5205
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7716906153937922e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.0715405061841011
          entropy_coeff: 0.009999999999999998
          kl: 0.0003880154435065582
          policy_loss: -0.011271924359930886
          total_loss: 1.810962200164795
          vf_explained_var: 0.03365614637732506
          vf_loss: 1.8229495114750331
    num_agent_steps_sampled: 1677000
    num_agent_steps_trained: 1677000
    num_steps_sampled: 1677000
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1677,43080.6,1677000,-23.927,-18.7,-36.1,239.27


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1678000
  custom_metrics: {}
  date: 2021-10-22_07-44-31
  done: false
  episode_len_mean: 237.81
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.781000000000063
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 5
  episodes_total: 5210
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.858453076968961e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.07234722690449821
          entropy_coeff: 0.009999999999999998
          kl: 0.0002220761252015165
          policy_loss: 0.011805760943227344
          total_loss: 1.5911836862564086
          vf_explained_var: 0.03392985090613365
          vf_loss: 1.5801014012760586
    num_agent_steps_sampled: 1678000
    num_agent_steps_trained: 1678000
    num_steps_sampled: 1678000
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1678,43114,1678000,-23.781,-18.7,-36.1,237.81


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1679000
  custom_metrics: {}
  date: 2021-10-22_07-45-04
  done: false
  episode_len_mean: 236.69
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.669000000000064
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 4
  episodes_total: 5214
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.429226538484481e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.14974473615487416
          entropy_coeff: 0.009999999999999998
          kl: 0.009911473678377128
          policy_loss: 0.05589889950222439
          total_loss: 1.1095053672790527
          vf_explained_var: 0.01959378644824028
          vf_loss: 1.0551039331489138
    num_agent_steps_sampled: 1679000
    num_agent_steps_trained: 1679000
    num_steps_sampled: 1679000
    num_steps_trained: 1679

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1679,43147.6,1679000,-23.669,-18.7,-36.1,236.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1680000
  custom_metrics: {}
  date: 2021-10-22_07-45-33
  done: false
  episode_len_mean: 236.68
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.668000000000067
  episode_reward_min: -36.10000000000024
  episodes_this_iter: 4
  episodes_total: 5218
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.429226538484481e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.27523558487494787
          entropy_coeff: 0.009999999999999998
          kl: 0.07336322371983933
          policy_loss: -0.0043367805994219254
          total_loss: 1.172847400771247
          vf_explained_var: 0.21105654537677765
          vf_loss: 1.17993653482861
    num_agent_steps_sampled: 1680000
    num_agent_steps_trained: 1680000
    num_steps_sampled: 1680000
    num_steps_trained: 16800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1680,43176.3,1680000,-23.668,-18.7,-36.1,236.68




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1681000
  custom_metrics: {}
  date: 2021-10-22_07-46-10
  done: false
  episode_len_mean: 240.37
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -24.037000000000077
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 3
  episodes_total: 5221
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.6438398077267245e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.42961737397644256
          entropy_coeff: 0.009999999999999998
          kl: 0.04619512026309154
          policy_loss: 0.05100260418322351
          total_loss: 0.6177434901396434
          vf_explained_var: 0.5415109395980835
          vf_loss: 0.5710370668158349
    num_agent_steps_sampled: 1681000
    num_agent_steps_trained: 1681000
    num_steps_sampled: 1681000
    num_steps_trained: 16810

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1681,43213.6,1681000,-24.037,-18.7,-50.5,240.37


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1682000
  custom_metrics: {}
  date: 2021-10-22_07-46-31
  done: false
  episode_len_mean: 242.2
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -24.220000000000077
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 2
  episodes_total: 5223
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.965759711590084e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.8231989820798238
          entropy_coeff: 0.009999999999999998
          kl: 0.0747546661961716
          policy_loss: -0.021278153856595358
          total_loss: 0.5839789152145386
          vf_explained_var: 0.46521446108818054
          vf_loss: 0.6134890696137316
    num_agent_steps_sampled: 1682000
    num_agent_steps_trained: 1682000
    num_steps_sampled: 1682000
    num_steps_trained: 168200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1682,43234.2,1682000,-24.22,-18.7,-50.5,242.2


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1683000
  custom_metrics: {}
  date: 2021-10-22_07-46-55
  done: false
  episode_len_mean: 245.8
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -24.580000000000076
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 3
  episodes_total: 5226
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4948639567385121e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.8965896281931135
          entropy_coeff: 0.009999999999999998
          kl: 0.037181966680849936
          policy_loss: -0.0972145145965947
          total_loss: 1.1359574817948872
          vf_explained_var: 0.4001346230506897
          vf_loss: 1.2421379016505347
    num_agent_steps_sampled: 1683000
    num_agent_steps_trained: 1683000
    num_steps_sampled: 1683000
    num_steps_trained: 168300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1683,43258.1,1683000,-24.58,-18.7,-50.5,245.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1684000
  custom_metrics: {}
  date: 2021-10-22_07-47-16
  done: false
  episode_len_mean: 246.88
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -24.68800000000008
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 3
  episodes_total: 5229
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2422959351077695e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.8357431266042922
          entropy_coeff: 0.009999999999999998
          kl: 0.023236259802306196
          policy_loss: -0.06658202815386983
          total_loss: 1.0261412335766686
          vf_explained_var: 0.5761343836784363
          vf_loss: 1.1010807004239824
    num_agent_steps_sampled: 1684000
    num_agent_steps_trained: 1684000
    num_steps_sampled: 1684000
    num_steps_trained: 16840

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1684,43279,1684000,-24.688,-18.7,-50.5,246.88


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1685000
  custom_metrics: {}
  date: 2021-10-22_07-47-36
  done: false
  episode_len_mean: 251.45
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -25.145000000000085
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 3
  episodes_total: 5232
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3634439026616535e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.8506514132022858
          entropy_coeff: 0.009999999999999998
          kl: 0.03645843407039781
          policy_loss: 0.05345668709940381
          total_loss: 0.43648595329788
          vf_explained_var: 0.8499116897583008
          vf_loss: 0.39153577973031334
    num_agent_steps_sampled: 1685000
    num_agent_steps_trained: 1685000
    num_steps_sampled: 1685000
    num_steps_trained: 1685000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1685,43298.9,1685000,-25.145,-18.7,-50.5,251.45


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1686000
  custom_metrics: {}
  date: 2021-10-22_07-48-02
  done: false
  episode_len_mean: 252.71
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -25.27100000000009
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 3
  episodes_total: 5235
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.0451658539924784e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.8154326518376668
          entropy_coeff: 0.009999999999999998
          kl: 0.04066347507557898
          policy_loss: 0.039359595088495146
          total_loss: 0.5086308696203762
          vf_explained_var: 0.7804316282272339
          vf_loss: 0.47742559876706864
    num_agent_steps_sampled: 1686000
    num_agent_steps_trained: 1686000
    num_steps_sampled: 1686000
    num_steps_trained: 16860

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1686,43325,1686000,-25.271,-18.7,-50.5,252.71


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1687000
  custom_metrics: {}
  date: 2021-10-22_07-48-27
  done: false
  episode_len_mean: 254.55
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -25.45500000000009
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 3
  episodes_total: 5238
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.567748780988721e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.704103954633077
          entropy_coeff: 0.009999999999999998
          kl: 0.011219028704166098
          policy_loss: -0.07344604308406512
          total_loss: 0.5935265805986193
          vf_explained_var: 0.8176064491271973
          vf_loss: 0.6740136649873522
    num_agent_steps_sampled: 1687000
    num_agent_steps_trained: 1687000
    num_steps_sampled: 1687000
    num_steps_trained: 1687000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1687,43349.8,1687000,-25.455,-18.7,-50.5,254.55


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1688000
  custom_metrics: {}
  date: 2021-10-22_07-48-52
  done: false
  episode_len_mean: 256.33
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -25.633000000000088
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 4
  episodes_total: 5242
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.567748780988721e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.7031629059049819
          entropy_coeff: 0.009999999999999998
          kl: 0.012562594927164621
          policy_loss: 0.033466434023446505
          total_loss: 0.6654275602764553
          vf_explained_var: 0.7866249084472656
          vf_loss: 0.6389927496512731
    num_agent_steps_sampled: 1688000
    num_agent_steps_trained: 1688000
    num_steps_sampled: 1688000
    num_steps_trained: 16880

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1688,43375.3,1688000,-25.633,-18.7,-50.5,256.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1689000
  custom_metrics: {}
  date: 2021-10-22_07-49-20
  done: false
  episode_len_mean: 257.72
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -25.772000000000094
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 4
  episodes_total: 5246
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.567748780988721e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.6802012019687229
          entropy_coeff: 0.009999999999999998
          kl: 0.02768283062590342
          policy_loss: -0.0017556351092126634
          total_loss: 0.934136610560947
          vf_explained_var: 0.6380051374435425
          vf_loss: 0.9426942626635234
    num_agent_steps_sampled: 1689000
    num_agent_steps_trained: 1689000
    num_steps_sampled: 1689000
    num_steps_trained: 16890

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1689,43403.1,1689000,-25.772,-18.7,-50.5,257.72


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1690000
  custom_metrics: {}
  date: 2021-10-22_07-49-47
  done: false
  episode_len_mean: 258.23
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -25.823000000000093
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 3
  episodes_total: 5249
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1351623171483078e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.6481787797477511
          entropy_coeff: 0.009999999999999998
          kl: 0.016484125374638676
          policy_loss: -0.08399721114999718
          total_loss: 0.8300315300623576
          vf_explained_var: 0.5171217918395996
          vf_loss: 0.9205105218622419
    num_agent_steps_sampled: 1690000
    num_agent_steps_trained: 1690000
    num_steps_sampled: 1690000
    num_steps_trained: 1690

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1690,43430.6,1690000,-25.823,-18.7,-50.5,258.23




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1691000
  custom_metrics: {}
  date: 2021-10-22_07-50-32
  done: false
  episode_len_mean: 259.05
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -25.905000000000097
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 4
  episodes_total: 5253
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1351623171483078e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.6480962736739053
          entropy_coeff: 0.009999999999999998
          kl: 0.02672377478643442
          policy_loss: 0.009205671317047543
          total_loss: 1.100162159734302
          vf_explained_var: 0.3300127685070038
          vf_loss: 1.0974374526076847
    num_agent_steps_sampled: 1691000
    num_agent_steps_trained: 1691000
    num_steps_sampled: 1691000
    num_steps_trained: 169100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1691,43475.3,1691000,-25.905,-18.7,-50.5,259.05


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1692000
  custom_metrics: {}
  date: 2021-10-22_07-50-59
  done: false
  episode_len_mean: 260.13
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -26.0130000000001
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 4
  episodes_total: 5257
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.702743475722462e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.7968514813317193
          entropy_coeff: 0.009999999999999998
          kl: 0.08601833698123372
          policy_loss: 0.09001559134986666
          total_loss: 0.8054396569728851
          vf_explained_var: 0.7025483846664429
          vf_loss: 0.7233925832642449
    num_agent_steps_sampled: 1692000
    num_agent_steps_trained: 1692000
    num_steps_sampled: 1692000
    num_steps_trained: 1692000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1692,43502.5,1692000,-26.013,-18.7,-50.5,260.13


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1693000
  custom_metrics: {}
  date: 2021-10-22_07-51-26
  done: false
  episode_len_mean: 261.71
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -26.1710000000001
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 3
  episodes_total: 5260
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5541152135836923e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.8113097025288476
          entropy_coeff: 0.009999999999999998
          kl: 0.017647496990590934
          policy_loss: -0.0183738407989343
          total_loss: 0.7038701073990927
          vf_explained_var: 0.7309154868125916
          vf_loss: 0.7303570508956909
    num_agent_steps_sampled: 1693000
    num_agent_steps_trained: 1693000
    num_steps_sampled: 1693000
    num_steps_trained: 1693000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1693,43528.9,1693000,-26.171,-18.7,-50.5,261.71


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1694000
  custom_metrics: {}
  date: 2021-10-22_07-51-51
  done: false
  episode_len_mean: 263.78
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -26.378000000000103
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 4
  episodes_total: 5264
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5541152135836923e-57
          cur_lr: 5.000000000000001e-05
          entropy: 1.0226773202419281
          entropy_coeff: 0.009999999999999998
          kl: 0.06093313937830026
          policy_loss: -0.0030103690094417995
          total_loss: 0.5340359712640445
          vf_explained_var: 0.8032806515693665
          vf_loss: 0.5472731211119228
    num_agent_steps_sampled: 1694000
    num_agent_steps_trained: 1694000
    num_steps_sampled: 1694000
    num_steps_trained: 169

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1694,43553.8,1694000,-26.378,-18.7,-50.5,263.78


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1695000
  custom_metrics: {}
  date: 2021-10-22_07-52-19
  done: false
  episode_len_mean: 264.57
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -26.457000000000104
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 4
  episodes_total: 5268
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.8311728203755394e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.5755754954285092
          entropy_coeff: 0.009999999999999998
          kl: 0.013595742847506722
          policy_loss: -0.05915157871527804
          total_loss: 0.34096898320648406
          vf_explained_var: 0.8298630714416504
          vf_loss: 0.4058763156334559
    num_agent_steps_sampled: 1695000
    num_agent_steps_trained: 1695000
    num_steps_sampled: 1695000
    num_steps_trained: 169

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1695,43581.9,1695000,-26.457,-18.7,-50.5,264.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1696000
  custom_metrics: {}
  date: 2021-10-22_07-52-47
  done: false
  episode_len_mean: 265.31
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -26.531000000000105
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 3
  episodes_total: 5271
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.8311728203755394e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.7155829681290521
          entropy_coeff: 0.009999999999999998
          kl: 0.06821043190301435
          policy_loss: -0.04138463180926111
          total_loss: 0.4054686103430059
          vf_explained_var: 0.8390390276908875
          vf_loss: 0.45400907198588053
    num_agent_steps_sampled: 1696000
    num_agent_steps_trained: 1696000
    num_steps_sampled: 1696000
    num_steps_trained: 1696

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1696,43610.5,1696000,-26.531,-18.7,-50.5,265.31


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1697000
  custom_metrics: {}
  date: 2021-10-22_07-53-15
  done: false
  episode_len_mean: 266.16
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -26.6160000000001
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 4
  episodes_total: 5275
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.746759230563307e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.686862380637063
          entropy_coeff: 0.009999999999999998
          kl: 0.010816199921549065
          policy_loss: 0.02827677900592486
          total_loss: 0.7096215526262919
          vf_explained_var: 0.6293875575065613
          vf_loss: 0.6882133907741971
    num_agent_steps_sampled: 1697000
    num_agent_steps_trained: 1697000
    num_steps_sampled: 1697000
    num_steps_trained: 1697000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1697,43637.6,1697000,-26.616,-18.7,-50.5,266.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1698000
  custom_metrics: {}
  date: 2021-10-22_07-53-42
  done: false
  episode_len_mean: 268.56
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -26.85600000000011
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 4
  episodes_total: 5279
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.746759230563307e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.8336842570039961
          entropy_coeff: 0.009999999999999998
          kl: 0.030054707114859348
          policy_loss: 0.04884205866191122
          total_loss: 0.58082468410333
          vf_explained_var: 0.7718223333358765
          vf_loss: 0.5403194738758935
    num_agent_steps_sampled: 1698000
    num_agent_steps_trained: 1698000
    num_steps_sampled: 1698000
    num_steps_trained: 1698000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1698,43664.7,1698000,-26.856,-18.7,-50.5,268.56




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1699000
  custom_metrics: {}
  date: 2021-10-22_07-54-27
  done: false
  episode_len_mean: 269.58
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -26.958000000000112
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 4
  episodes_total: 5283
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.620138845844962e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.7971511012978024
          entropy_coeff: 0.009999999999999998
          kl: 0.01295537184888439
          policy_loss: 5.660512381129795e-05
          total_loss: 0.6421184864309099
          vf_explained_var: 0.7230583429336548
          vf_loss: 0.6500334001249737
    num_agent_steps_sampled: 1699000
    num_agent_steps_trained: 1699000
    num_steps_sampled: 1699000
    num_steps_trained: 16990

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1699,43710.2,1699000,-26.958,-18.7,-50.5,269.58


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1700000
  custom_metrics: {}
  date: 2021-10-22_07-54-55
  done: false
  episode_len_mean: 270.77
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -27.077000000000115
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 3
  episodes_total: 5286
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.620138845844962e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.8539555172125498
          entropy_coeff: 0.009999999999999998
          kl: 0.012015987160863044
          policy_loss: 0.014865818950865004
          total_loss: 0.645894076095687
          vf_explained_var: 0.5137613415718079
          vf_loss: 0.6395678251981736
    num_agent_steps_sampled: 1700000
    num_agent_steps_trained: 1700000
    num_steps_sampled: 1700000
    num_steps_trained: 170000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1700,43738,1700000,-27.077,-18.7,-50.5,270.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1701000
  custom_metrics: {}
  date: 2021-10-22_07-55-18
  done: false
  episode_len_mean: 273.67
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -27.367000000000118
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 3
  episodes_total: 5289
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.620138845844962e-57
          cur_lr: 5.000000000000001e-05
          entropy: 1.0379156827926637
          entropy_coeff: 0.009999999999999998
          kl: 0.03703521444684965
          policy_loss: -0.10954344239499834
          total_loss: 0.6273158351580302
          vf_explained_var: 0.6701611280441284
          vf_loss: 0.747238431374232
    num_agent_steps_sampled: 1701000
    num_agent_steps_trained: 1701000
    num_steps_sampled: 1701000
    num_steps_trained: 1701000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1701,43761,1701000,-27.367,-18.7,-50.5,273.67


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1702000
  custom_metrics: {}
  date: 2021-10-22_07-55-47
  done: false
  episode_len_mean: 274.93
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -27.49300000000012
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 4
  episodes_total: 5293
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2930208268767446e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.6449415942033132
          entropy_coeff: 0.009999999999999998
          kl: 0.015268744171573366
          policy_loss: -0.024164964424239266
          total_loss: 0.7565584007236693
          vf_explained_var: 0.5211116075515747
          vf_loss: 0.7871727791097429
    num_agent_steps_sampled: 1702000
    num_agent_steps_trained: 1702000
    num_steps_sampled: 1702000
    num_steps_trained: 1702

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1702,43790.3,1702000,-27.493,-21.5,-50.5,274.93


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1703000
  custom_metrics: {}
  date: 2021-10-22_07-56-16
  done: false
  episode_len_mean: 276.53
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -27.653000000000123
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 4
  episodes_total: 5297
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2930208268767446e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.5892394092347887
          entropy_coeff: 0.009999999999999998
          kl: 0.009726791026955084
          policy_loss: 0.02199314832687378
          total_loss: 0.7422642111778259
          vf_explained_var: 0.5615414977073669
          vf_loss: 0.7261634641223483
    num_agent_steps_sampled: 1703000
    num_agent_steps_trained: 1703000
    num_steps_sampled: 1703000
    num_steps_trained: 17030

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1703,43818.8,1703000,-27.653,-21.5,-50.5,276.53


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1704000
  custom_metrics: {}
  date: 2021-10-22_07-56-44
  done: false
  episode_len_mean: 278.46
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -27.846000000000117
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 4
  episodes_total: 5301
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2930208268767446e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.6541758378346761
          entropy_coeff: 0.009999999999999998
          kl: 0.012729476726670085
          policy_loss: -0.03615603314505683
          total_loss: 0.9024573657247755
          vf_explained_var: 0.40345263481140137
          vf_loss: 0.9451551616191864
    num_agent_steps_sampled: 1704000
    num_agent_steps_trained: 1704000
    num_steps_sampled: 1704000
    num_steps_trained: 170

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1704,43847,1704000,-27.846,-21.5,-50.5,278.46


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1705000
  custom_metrics: {}
  date: 2021-10-22_07-57-12
  done: false
  episode_len_mean: 279.98
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -27.998000000000125
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 4
  episodes_total: 5305
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2930208268767446e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.6112755212518904
          entropy_coeff: 0.009999999999999998
          kl: 0.006797255683226133
          policy_loss: 0.022314262928234206
          total_loss: 1.1244093550576104
          vf_explained_var: 0.21967031061649323
          vf_loss: 1.1082078417142232
    num_agent_steps_sampled: 1705000
    num_agent_steps_trained: 1705000
    num_steps_sampled: 1705000
    num_steps_trained: 170

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1705,43875.3,1705000,-27.998,-21.5,-50.5,279.98


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1706000
  custom_metrics: {}
  date: 2021-10-22_07-57-42
  done: false
  episode_len_mean: 281.47
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.14700000000013
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 4
  episodes_total: 5309
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2930208268767446e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.625126639339659
          entropy_coeff: 0.009999999999999998
          kl: 0.003938352492134851
          policy_loss: 0.03161217512355911
          total_loss: 1.2604551023907131
          vf_explained_var: 0.12192487716674805
          vf_loss: 1.2350941949420504
    num_agent_steps_sampled: 1706000
    num_agent_steps_trained: 1706000
    num_steps_sampled: 1706000
    num_steps_trained: 170600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1706,43905.1,1706000,-28.147,-21.5,-50.5,281.47




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1707000
  custom_metrics: {}
  date: 2021-10-22_07-58-29
  done: false
  episode_len_mean: 282.6
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -28.260000000000133
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 4
  episodes_total: 5313
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.465104134383723e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.6169176512294345
          entropy_coeff: 0.009999999999999998
          kl: 0.008463157562727652
          policy_loss: 0.023875034434927833
          total_loss: 1.1583772553337945
          vf_explained_var: 0.26303237676620483
          vf_loss: 1.1406714035405052
    num_agent_steps_sampled: 1707000
    num_agent_steps_trained: 1707000
    num_steps_sampled: 1707000
    num_steps_trained: 17070

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1707,43951.4,1707000,-28.26,-21.6,-50.5,282.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1708000
  custom_metrics: {}
  date: 2021-10-22_07-58-56
  done: false
  episode_len_mean: 283.69
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -28.36900000000013
  episode_reward_min: -50.50000000000045
  episodes_this_iter: 4
  episodes_total: 5317
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.465104134383723e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.7091090745396085
          entropy_coeff: 0.009999999999999998
          kl: 0.022516581282740193
          policy_loss: 0.0386122298737367
          total_loss: 1.1507555961608886
          vf_explained_var: 0.26252156496047974
          vf_loss: 1.1192344579431746
    num_agent_steps_sampled: 1708000
    num_agent_steps_trained: 1708000
    num_steps_sampled: 1708000
    num_steps_trained: 1708000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1708,43978.7,1708000,-28.369,-22,-50.5,283.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1709000
  custom_metrics: {}
  date: 2021-10-22_07-59-25
  done: false
  episode_len_mean: 280.33
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -28.03300000000013
  episode_reward_min: -46.9000000000004
  episodes_this_iter: 4
  episodes_total: 5321
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.697656201575583e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.7111885335710314
          entropy_coeff: 0.009999999999999998
          kl: 0.008696836898399044
          policy_loss: 0.019094054566489327
          total_loss: 1.2677097227838305
          vf_explained_var: 0.19552721083164215
          vf_loss: 1.2557275454203287
    num_agent_steps_sampled: 1709000
    num_agent_steps_trained: 1709000
    num_steps_sampled: 1709000
    num_steps_trained: 170900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1709,44007.7,1709000,-28.033,-22,-46.9,280.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1710000
  custom_metrics: {}
  date: 2021-10-22_07-59-52
  done: false
  episode_len_mean: 277.58
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -27.758000000000123
  episode_reward_min: -46.9000000000004
  episodes_this_iter: 3
  episodes_total: 5324
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.697656201575583e-57
          cur_lr: 5.000000000000001e-05
          entropy: 1.0253599305947623
          entropy_coeff: 0.009999999999999998
          kl: 0.05423364403260204
          policy_loss: -0.02511776586373647
          total_loss: 0.9258528199460772
          vf_explained_var: 0.08773678541183472
          vf_loss: 0.9612241975135274
    num_agent_steps_sampled: 1710000
    num_agent_steps_trained: 1710000
    num_steps_sampled: 1710000
    num_steps_trained: 171000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1710,44034.8,1710000,-27.758,-22,-46.9,277.58


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1711000
  custom_metrics: {}
  date: 2021-10-22_08-00-21
  done: false
  episode_len_mean: 274.46
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -27.446000000000122
  episode_reward_min: -46.9000000000004
  episodes_this_iter: 4
  episodes_total: 5328
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.454648430236338e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.7383489396837023
          entropy_coeff: 0.009999999999999998
          kl: 0.020608345501060575
          policy_loss: -0.016989973187446595
          total_loss: 0.9120486378669739
          vf_explained_var: 0.4575392007827759
          vf_loss: 0.9364221059613758
    num_agent_steps_sampled: 1711000
    num_agent_steps_trained: 1711000
    num_steps_sampled: 1711000
    num_steps_trained: 17110

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1711,44063.8,1711000,-27.446,-22,-46.9,274.46


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1712000
  custom_metrics: {}
  date: 2021-10-22_08-00-50
  done: false
  episode_len_mean: 268.96
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -26.896000000000114
  episode_reward_min: -41.100000000000314
  episodes_this_iter: 4
  episodes_total: 5332
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.181972645354506e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.8837468114164141
          entropy_coeff: 0.009999999999999998
          kl: 0.013249171746655799
          policy_loss: 0.0032387290149927138
          total_loss: 0.729353172911538
          vf_explained_var: 0.6187520623207092
          vf_loss: 0.7349519073963166
    num_agent_steps_sampled: 1712000
    num_agent_steps_trained: 1712000
    num_steps_sampled: 1712000
    num_steps_trained: 1712

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1712,44092.4,1712000,-26.896,-22,-41.1,268.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1713000
  custom_metrics: {}
  date: 2021-10-22_08-01-19
  done: false
  episode_len_mean: 267.49
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -26.749000000000105
  episode_reward_min: -41.100000000000314
  episodes_this_iter: 4
  episodes_total: 5336
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.181972645354506e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.8054318891631232
          entropy_coeff: 0.009999999999999998
          kl: 0.03920311753036066
          policy_loss: 0.01622504633333948
          total_loss: 1.0251267181502448
          vf_explained_var: 0.37758705019950867
          vf_loss: 1.0169560141033596
    num_agent_steps_sampled: 1713000
    num_agent_steps_trained: 1713000
    num_steps_sampled: 1713000
    num_steps_trained: 17130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1713,44122,1713000,-26.749,-22,-41.1,267.49




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1714000
  custom_metrics: {}
  date: 2021-10-22_08-02-04
  done: false
  episode_len_mean: 265.02
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -26.50200000000011
  episode_reward_min: -41.100000000000314
  episodes_this_iter: 4
  episodes_total: 5340
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2729589680317594e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.7347181492381626
          entropy_coeff: 0.009999999999999998
          kl: 0.014708325854003432
          policy_loss: 0.007413528362909953
          total_loss: 0.8046817402044932
          vf_explained_var: 0.3418343961238861
          vf_loss: 0.8046153883139292
    num_agent_steps_sampled: 1714000
    num_agent_steps_trained: 1714000
    num_steps_sampled: 1714000
    num_steps_trained: 1714

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1714,44167.1,1714000,-26.502,-22,-41.1,265.02


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1715000
  custom_metrics: {}
  date: 2021-10-22_08-02-32
  done: false
  episode_len_mean: 264.5
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -26.450000000000113
  episode_reward_min: -41.100000000000314
  episodes_this_iter: 4
  episodes_total: 5344
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2729589680317594e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.5602914859851201
          entropy_coeff: 0.009999999999999998
          kl: 0.008845357719900448
          policy_loss: -0.001269793634613355
          total_loss: 0.8333278126186795
          vf_explained_var: 0.4858875274658203
          vf_loss: 0.8402005208863152
    num_agent_steps_sampled: 1715000
    num_agent_steps_trained: 1715000
    num_steps_sampled: 1715000
    num_steps_trained: 171

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1715,44194.4,1715000,-26.45,-22,-41.1,264.5


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1716000
  custom_metrics: {}
  date: 2021-10-22_08-03-01
  done: false
  episode_len_mean: 264.51
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -26.451000000000104
  episode_reward_min: -41.100000000000314
  episodes_this_iter: 4
  episodes_total: 5348
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2729589680317594e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.6619070500135422
          entropy_coeff: 0.009999999999999998
          kl: 0.01392321803310147
          policy_loss: -0.01270454583896531
          total_loss: 0.7101664897468355
          vf_explained_var: 0.5491304993629456
          vf_loss: 0.7294901096158557
    num_agent_steps_sampled: 1716000
    num_agent_steps_trained: 1716000
    num_steps_sampled: 1716000
    num_steps_trained: 1716

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1716,44223.6,1716000,-26.451,-22,-41.1,264.51


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1717000
  custom_metrics: {}
  date: 2021-10-22_08-03-24
  done: false
  episode_len_mean: 265.91
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -26.59100000000011
  episode_reward_min: -41.100000000000314
  episodes_this_iter: 3
  episodes_total: 5351
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2729589680317594e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.6237250139315923
          entropy_coeff: 0.009999999999999998
          kl: 0.008071927735325661
          policy_loss: 0.07019028200043573
          total_loss: 0.6883660164144304
          vf_explained_var: 0.5996257066726685
          vf_loss: 0.6244129788544442
    num_agent_steps_sampled: 1717000
    num_agent_steps_trained: 1717000
    num_steps_sampled: 1717000
    num_steps_trained: 17170

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1717,44246.5,1717000,-26.591,-22,-41.1,265.91


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1718000
  custom_metrics: {}
  date: 2021-10-22_08-03-53
  done: false
  episode_len_mean: 265.8
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -26.58000000000011
  episode_reward_min: -41.100000000000314
  episodes_this_iter: 4
  episodes_total: 5355
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2729589680317594e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.7002672248416477
          entropy_coeff: 0.009999999999999998
          kl: 0.005135058460243474
          policy_loss: 0.006862988985247082
          total_loss: 0.9333500742912293
          vf_explained_var: 0.37765562534332275
          vf_loss: 0.933489759100808
    num_agent_steps_sampled: 1718000
    num_agent_steps_trained: 1718000
    num_steps_sampled: 1718000
    num_steps_trained: 17180

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1718,44275.2,1718000,-26.58,-22,-41.1,265.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1719000
  custom_metrics: {}
  date: 2021-10-22_08-04-22
  done: false
  episode_len_mean: 264.78
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -26.478000000000108
  episode_reward_min: -41.100000000000314
  episodes_this_iter: 4
  episodes_total: 5359
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2729589680317594e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.6917362650235493
          entropy_coeff: 0.009999999999999998
          kl: 0.011020432053774383
          policy_loss: -0.005834885686635971
          total_loss: 1.0882152087158627
          vf_explained_var: 0.23074491322040558
          vf_loss: 1.1009674509366354
    num_agent_steps_sampled: 1719000
    num_agent_steps_trained: 1719000
    num_steps_sampled: 1719000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1719,44304.6,1719000,-26.478,-22,-41.1,264.78


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1720000
  custom_metrics: {}
  date: 2021-10-22_08-04-50
  done: false
  episode_len_mean: 263.25
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -26.3250000000001
  episode_reward_min: -41.100000000000314
  episodes_this_iter: 4
  episodes_total: 5363
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2729589680317594e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.8769094374444749
          entropy_coeff: 0.009999999999999998
          kl: 0.02387783292600842
          policy_loss: -0.011661702518661816
          total_loss: 1.0201762821939258
          vf_explained_var: 0.3195936381816864
          vf_loss: 1.0406070801946852
    num_agent_steps_sampled: 1720000
    num_agent_steps_trained: 1720000
    num_steps_sampled: 1720000
    num_steps_trained: 17200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1720,44333.1,1720000,-26.325,-22,-41.1,263.25


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1721000
  custom_metrics: {}
  date: 2021-10-22_08-05-19
  done: false
  episode_len_mean: 262.94
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -26.294000000000107
  episode_reward_min: -41.100000000000314
  episodes_this_iter: 3
  episodes_total: 5366
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.909438452047637e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.7624844478236305
          entropy_coeff: 0.009999999999999998
          kl: 0.03345602053016196
          policy_loss: -0.07935490757226944
          total_loss: 0.8911701109674242
          vf_explained_var: 0.3419575095176697
          vf_loss: 0.9781498597727881
    num_agent_steps_sampled: 1721000
    num_agent_steps_trained: 1721000
    num_steps_sampled: 1721000
    num_steps_trained: 17210

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1721,44361.8,1721000,-26.294,-22,-41.1,262.94




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1722000
  custom_metrics: {}
  date: 2021-10-22_08-06-07
  done: false
  episode_len_mean: 262.38
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.238000000000103
  episode_reward_min: -41.100000000000314
  episodes_this_iter: 5
  episodes_total: 5371
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.364157678071458e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.6130260427792867
          entropy_coeff: 0.009999999999999998
          kl: 0.0080702642778571
          policy_loss: -0.016352138999435637
          total_loss: 1.3932922111617194
          vf_explained_var: 0.275634765625
          vf_loss: 1.4157745997111002
    num_agent_steps_sampled: 1722000
    num_agent_steps_trained: 1722000
    num_steps_sampled: 1722000
    num_steps_trained: 1722000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1722,44409.3,1722000,-26.238,-21.6,-41.1,262.38


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1723000
  custom_metrics: {}
  date: 2021-10-22_08-06-33
  done: false
  episode_len_mean: 262.77
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.277000000000104
  episode_reward_min: -41.100000000000314
  episodes_this_iter: 3
  episodes_total: 5374
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.364157678071458e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.7327784776687623
          entropy_coeff: 0.009999999999999998
          kl: 0.023957639668149493
          policy_loss: 0.008145201288991504
          total_loss: 0.9432480159733031
          vf_explained_var: 0.22493936121463776
          vf_loss: 0.9424306008550856
    num_agent_steps_sampled: 1723000
    num_agent_steps_trained: 1723000
    num_steps_sampled: 1723000
    num_steps_trained: 172

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1723,44435.9,1723000,-26.277,-21.6,-41.1,262.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1724000
  custom_metrics: {}
  date: 2021-10-22_08-07-01
  done: false
  episode_len_mean: 262.62
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.262000000000103
  episode_reward_min: -41.100000000000314
  episodes_this_iter: 4
  episodes_total: 5378
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1046236517107184e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.8046128882302178
          entropy_coeff: 0.009999999999999998
          kl: 0.025082375765616564
          policy_loss: 0.02200883055726687
          total_loss: 1.2330963916248745
          vf_explained_var: 0.26168709993362427
          vf_loss: 1.2191336856948005
    num_agent_steps_sampled: 1724000
    num_agent_steps_trained: 1724000
    num_steps_sampled: 1724000
    num_steps_trained: 172

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1724,44463.5,1724000,-26.262,-21.6,-41.1,262.62


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1725000
  custom_metrics: {}
  date: 2021-10-22_08-07-28
  done: false
  episode_len_mean: 262.51
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.2510000000001
  episode_reward_min: -41.100000000000314
  episodes_this_iter: 4
  episodes_total: 5382
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.656935477566078e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.9441028873125712
          entropy_coeff: 0.009999999999999998
          kl: 0.012298557788234552
          policy_loss: 0.025161400271786583
          total_loss: 1.3518407569991218
          vf_explained_var: 0.18711265921592712
          vf_loss: 1.3361203816201952
    num_agent_steps_sampled: 1725000
    num_agent_steps_trained: 1725000
    num_steps_sampled: 1725000
    num_steps_trained: 17250

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1725,44490.8,1725000,-26.251,-21.6,-41.1,262.51


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1726000
  custom_metrics: {}
  date: 2021-10-22_08-07-54
  done: false
  episode_len_mean: 262.97
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.297000000000107
  episode_reward_min: -41.100000000000314
  episodes_this_iter: 3
  episodes_total: 5385
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.656935477566078e-55
          cur_lr: 5.000000000000001e-05
          entropy: 1.241597851117452
          entropy_coeff: 0.009999999999999998
          kl: 0.037163039453030754
          policy_loss: 0.04548008375697666
          total_loss: 0.9044877307282554
          vf_explained_var: 0.3952898681163788
          vf_loss: 0.8714236418406168
    num_agent_steps_sampled: 1726000
    num_agent_steps_trained: 1726000
    num_steps_sampled: 1726000
    num_steps_trained: 172600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1726,44516.3,1726000,-26.297,-21.6,-41.1,262.97


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1727000
  custom_metrics: {}
  date: 2021-10-22_08-08-21
  done: false
  episode_len_mean: 261.96
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.196000000000105
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 4
  episodes_total: 5389
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4854032163491166e-55
          cur_lr: 5.000000000000001e-05
          entropy: 1.0371074391735924
          entropy_coeff: 0.009999999999999998
          kl: 0.015508828804924709
          policy_loss: 0.001920493526591195
          total_loss: 1.361412689420912
          vf_explained_var: 0.28250983357429504
          vf_loss: 1.3698632624414233
    num_agent_steps_sampled: 1727000
    num_agent_steps_trained: 1727000
    num_steps_sampled: 1727000
    num_steps_trained: 172

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1727,44543,1727000,-26.196,-21.6,-34.1,261.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1728000
  custom_metrics: {}
  date: 2021-10-22_08-08-47
  done: false
  episode_len_mean: 262.4
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.240000000000094
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 3
  episodes_total: 5392
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4854032163491166e-55
          cur_lr: 5.000000000000001e-05
          entropy: 1.0870530313915676
          entropy_coeff: 0.009999999999999998
          kl: 0.03257179527005449
          policy_loss: 0.03700803766647975
          total_loss: 0.9673885345458985
          vf_explained_var: 0.3491324186325073
          vf_loss: 0.9412510368559096
    num_agent_steps_sampled: 1728000
    num_agent_steps_trained: 1728000
    num_steps_sampled: 1728000
    num_steps_trained: 172800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1728,44569.7,1728000,-26.24,-21.6,-34.1,262.4


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1729000
  custom_metrics: {}
  date: 2021-10-22_08-09-17
  done: false
  episode_len_mean: 262.43
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.243000000000105
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 4
  episodes_total: 5396
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.728104824523676e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.765212251080407
          entropy_coeff: 0.009999999999999998
          kl: 0.05007459197633024
          policy_loss: -0.019939846669634182
          total_loss: 1.0421340737077924
          vf_explained_var: 0.45241373777389526
          vf_loss: 1.0697260479132333
    num_agent_steps_sampled: 1729000
    num_agent_steps_trained: 1729000
    num_steps_sampled: 1729000
    num_steps_trained: 1729

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1729,44599.1,1729000,-26.243,-21.6,-34.1,262.43




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1730000
  custom_metrics: {}
  date: 2021-10-22_08-10-01
  done: false
  episode_len_mean: 262.18
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.218000000000103
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 4
  episodes_total: 5400
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.5921572367855125e-55
          cur_lr: 5.000000000000001e-05
          entropy: 1.023781492975023
          entropy_coeff: 0.009999999999999998
          kl: 0.02498384261214189
          policy_loss: 0.007072942165864839
          total_loss: 1.035640913910336
          vf_explained_var: 0.4681766927242279
          vf_loss: 1.0388057933913337
    num_agent_steps_sampled: 1730000
    num_agent_steps_trained: 1730000
    num_steps_sampled: 1730000
    num_steps_trained: 173000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1730,44642.9,1730000,-26.218,-21.6,-34.1,262.18


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1731000
  custom_metrics: {}
  date: 2021-10-22_08-10-29
  done: false
  episode_len_mean: 263.18
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.3180000000001
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 4
  episodes_total: 5404
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.388235855178269e-55
          cur_lr: 5.000000000000001e-05
          entropy: 1.045235206021203
          entropy_coeff: 0.009999999999999998
          kl: 0.027157145157717603
          policy_loss: -0.018261991317073505
          total_loss: 0.9952809095382691
          vf_explained_var: 0.5182459950447083
          vf_loss: 1.0239952544371287
    num_agent_steps_sampled: 1731000
    num_agent_steps_trained: 1731000
    num_steps_sampled: 1731000
    num_steps_trained: 173100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1731,44671.3,1731000,-26.318,-21.6,-34.1,263.18


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1732000
  custom_metrics: {}
  date: 2021-10-22_08-10-57
  done: false
  episode_len_mean: 263.57
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.357000000000102
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 3
  episodes_total: 5407
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2582353782767401e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.941681072447035
          entropy_coeff: 0.009999999999999998
          kl: 0.010059694818436545
          policy_loss: -0.019734276396532854
          total_loss: 0.8187147796154022
          vf_explained_var: 0.3564506471157074
          vf_loss: 0.8478658705949783
    num_agent_steps_sampled: 1732000
    num_agent_steps_trained: 1732000
    num_steps_sampled: 1732000
    num_steps_trained: 173

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1732,44699.5,1732000,-26.357,-21.6,-34.1,263.57


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1733000
  custom_metrics: {}
  date: 2021-10-22_08-11-24
  done: false
  episode_len_mean: 264.94
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.494000000000106
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 4
  episodes_total: 5411
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2582353782767401e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.9361252592669593
          entropy_coeff: 0.009999999999999998
          kl: 0.01620664780790043
          policy_loss: 0.048797068289584584
          total_loss: 1.1437440991401673
          vf_explained_var: 0.2465599626302719
          vf_loss: 1.1043082853158315
    num_agent_steps_sampled: 1733000
    num_agent_steps_trained: 1733000
    num_steps_sampled: 1733000
    num_steps_trained: 1733

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1733,44726,1733000,-26.494,-21.6,-34.1,264.94


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1734000
  custom_metrics: {}
  date: 2021-10-22_08-11-51
  done: false
  episode_len_mean: 265.25
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.525000000000105
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 4
  episodes_total: 5415
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2582353782767401e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.9366158213880327
          entropy_coeff: 0.009999999999999998
          kl: 0.023518681761804473
          policy_loss: 0.02842353507876396
          total_loss: 0.9817989461951786
          vf_explained_var: 0.27398258447647095
          vf_loss: 0.9627415749761793
    num_agent_steps_sampled: 1734000
    num_agent_steps_trained: 1734000
    num_steps_sampled: 1734000
    num_steps_trained: 173

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1734,44753.4,1734000,-26.525,-21.6,-34.1,265.25


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1735000
  custom_metrics: {}
  date: 2021-10-22_08-12-20
  done: false
  episode_len_mean: 265.34
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.534000000000106
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 3
  episodes_total: 5418
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8873530674151114e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.8772235949834187
          entropy_coeff: 0.009999999999999998
          kl: 0.012977165323501912
          policy_loss: -0.08734925554858314
          total_loss: 0.9036541289753384
          vf_explained_var: 0.3682674765586853
          vf_loss: 0.9997756163279216
    num_agent_steps_sampled: 1735000
    num_agent_steps_trained: 1735000
    num_steps_sampled: 1735000
    num_steps_trained: 173

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1735,44782.1,1735000,-26.534,-21.6,-34.1,265.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1736000
  custom_metrics: {}
  date: 2021-10-22_08-12-46
  done: false
  episode_len_mean: 265.87
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.587000000000103
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 4
  episodes_total: 5422
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8873530674151114e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.827912473016315
          entropy_coeff: 0.009999999999999998
          kl: 0.010108986468804758
          policy_loss: -0.013706124325593313
          total_loss: 1.2433212962415483
          vf_explained_var: 0.2896222472190857
          vf_loss: 1.2653065535757277
    num_agent_steps_sampled: 1736000
    num_agent_steps_trained: 1736000
    num_steps_sampled: 1736000
    num_steps_trained: 173

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1736,44808.8,1736000,-26.587,-21.6,-34.1,265.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1737000
  custom_metrics: {}
  date: 2021-10-22_08-13-13
  done: false
  episode_len_mean: 266.04
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.604000000000106
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 4
  episodes_total: 5426
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8873530674151114e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.8015660451518165
          entropy_coeff: 0.009999999999999998
          kl: 0.00885211322497273
          policy_loss: 0.0017807665798399183
          total_loss: 1.3628087904718187
          vf_explained_var: 0.25520166754722595
          vf_loss: 1.369043673409356
    num_agent_steps_sampled: 1737000
    num_agent_steps_trained: 1737000
    num_steps_sampled: 1737000
    num_steps_trained: 173

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1737,44835.4,1737000,-26.604,-21.6,-34.1,266.04




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1738000
  custom_metrics: {}
  date: 2021-10-22_08-13-58
  done: false
  episode_len_mean: 266.68
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.66800000000011
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 4
  episodes_total: 5430
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8873530674151114e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.7717750072479248
          entropy_coeff: 0.009999999999999998
          kl: 0.010558979163272397
          policy_loss: -0.002663828598128425
          total_loss: 1.290157934029897
          vf_explained_var: 0.31265807151794434
          vf_loss: 1.3005395134290059
    num_agent_steps_sampled: 1738000
    num_agent_steps_trained: 1738000
    num_steps_sampled: 1738000
    num_steps_trained: 173

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1738,44880.3,1738000,-26.668,-21.6,-34.1,266.68


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1739000
  custom_metrics: {}
  date: 2021-10-22_08-14-27
  done: false
  episode_len_mean: 267.01
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.701000000000114
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 3
  episodes_total: 5433
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8873530674151114e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.6147861606544919
          entropy_coeff: 0.009999999999999998
          kl: 0.007538427886431186
          policy_loss: -0.00214673669801818
          total_loss: 0.7873327142662472
          vf_explained_var: 0.5455177426338196
          vf_loss: 0.7956273125277625
    num_agent_steps_sampled: 1739000
    num_agent_steps_trained: 1739000
    num_steps_sampled: 1739000
    num_steps_trained: 173

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1739,44909.4,1739000,-26.701,-21.6,-34.1,267.01


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1740000
  custom_metrics: {}
  date: 2021-10-22_08-14-56
  done: false
  episode_len_mean: 267.21
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.721000000000114
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 4
  episodes_total: 5437
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8873530674151114e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.6082380400763617
          entropy_coeff: 0.009999999999999998
          kl: 0.005181617527231058
          policy_loss: -0.01988017476267285
          total_loss: 1.2749250027868482
          vf_explained_var: 0.28980112075805664
          vf_loss: 1.3008875528971353
    num_agent_steps_sampled: 1740000
    num_agent_steps_trained: 1740000
    num_steps_sampled: 1740000
    num_steps_trained: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1740,44938.3,1740000,-26.721,-21.6,-34.1,267.21


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1741000
  custom_metrics: {}
  date: 2021-10-22_08-15-25
  done: false
  episode_len_mean: 267.44
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.74400000000011
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 4
  episodes_total: 5441
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8873530674151114e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.5894263803958892
          entropy_coeff: 0.009999999999999998
          kl: 0.021863462475571655
          policy_loss: 0.003680347568458981
          total_loss: 1.0522848427295686
          vf_explained_var: 0.43175938725471497
          vf_loss: 1.0544987559318542
    num_agent_steps_sampled: 1741000
    num_agent_steps_trained: 1741000
    num_steps_sampled: 1741000
    num_steps_trained: 174

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1741,44967.2,1741000,-26.744,-21.6,-34.1,267.44


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1742000
  custom_metrics: {}
  date: 2021-10-22_08-15-54
  done: false
  episode_len_mean: 266.42
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.642000000000106
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 4
  episodes_total: 5445
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.831029601122665e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.4714487158589893
          entropy_coeff: 0.009999999999999998
          kl: 0.012869881030965126
          policy_loss: 0.011908869341843658
          total_loss: 1.1643193317784204
          vf_explained_var: 0.40697482228279114
          vf_loss: 1.1571249498261347
    num_agent_steps_sampled: 1742000
    num_agent_steps_trained: 1742000
    num_steps_sampled: 1742000
    num_steps_trained: 174

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1742,44996.4,1742000,-26.642,-21.6,-34.1,266.42


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1743000
  custom_metrics: {}
  date: 2021-10-22_08-16-23
  done: false
  episode_len_mean: 265.98
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.598000000000113
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 4
  episodes_total: 5449
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.831029601122665e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.49960129227903155
          entropy_coeff: 0.009999999999999998
          kl: 0.03250508478692511
          policy_loss: -0.004885483988457256
          total_loss: 0.9429860625002119
          vf_explained_var: 0.4278111457824707
          vf_loss: 0.9528675615787506
    num_agent_steps_sampled: 1743000
    num_agent_steps_trained: 1743000
    num_steps_sampled: 1743000
    num_steps_trained: 174

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1743,45025.3,1743000,-26.598,-21.6,-34.1,265.98


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1744000
  custom_metrics: {}
  date: 2021-10-22_08-16-52
  done: false
  episode_len_mean: 263.99
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.399000000000104
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 4
  episodes_total: 5453
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.246544401683999e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.6023620314068264
          entropy_coeff: 0.009999999999999998
          kl: 0.03048437972122239
          policy_loss: -0.0024017570747269525
          total_loss: 1.1030490371916029
          vf_explained_var: 0.35025233030319214
          vf_loss: 1.1114744186401366
    num_agent_steps_sampled: 1744000
    num_agent_steps_trained: 1744000
    num_steps_sampled: 1744000
    num_steps_trained: 174

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1744,45054.2,1744000,-26.399,-21.6,-32.3,263.99


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1745000
  custom_metrics: {}
  date: 2021-10-22_08-17-20
  done: false
  episode_len_mean: 264.42
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.442000000000103
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 4
  episodes_total: 5457
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.369816602525996e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.5198286331362194
          entropy_coeff: 0.009999999999999998
          kl: 0.012270847705836625
          policy_loss: 0.03424220581849416
          total_loss: 0.9737813406520419
          vf_explained_var: 0.42618328332901
          vf_loss: 0.9447374317381118
    num_agent_steps_sampled: 1745000
    num_agent_steps_trained: 1745000
    num_steps_sampled: 1745000
    num_steps_trained: 1745000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1745,45081.8,1745000,-26.442,-21.6,-32.3,264.42




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1746000
  custom_metrics: {}
  date: 2021-10-22_08-18-06
  done: false
  episode_len_mean: 264.28
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.4280000000001
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 4
  episodes_total: 5461
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.369816602525996e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.5873945948150423
          entropy_coeff: 0.009999999999999998
          kl: 0.012655927130654415
          policy_loss: 0.02289808529118697
          total_loss: 1.2356724017196232
          vf_explained_var: 0.3127690553665161
          vf_loss: 1.218648260169559
    num_agent_steps_sampled: 1746000
    num_agent_steps_trained: 1746000
    num_steps_sampled: 1746000
    num_steps_trained: 1746000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1746,45128.3,1746000,-26.428,-21.6,-32.3,264.28


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1747000
  custom_metrics: {}
  date: 2021-10-22_08-18-36
  done: false
  episode_len_mean: 264.28
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.428000000000107
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 4
  episodes_total: 5465
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.369816602525996e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.6050800840059917
          entropy_coeff: 0.009999999999999998
          kl: 0.0131643881492841
          policy_loss: 0.0028372501333554585
          total_loss: 1.1564230455292597
          vf_explained_var: 0.3029489517211914
          vf_loss: 1.1596366001500025
    num_agent_steps_sampled: 1747000
    num_agent_steps_trained: 1747000
    num_steps_sampled: 1747000
    num_steps_trained: 174700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1747,45157.9,1747000,-26.428,-21.6,-32.3,264.28


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1748000
  custom_metrics: {}
  date: 2021-10-22_08-19-04
  done: false
  episode_len_mean: 263.72
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.372000000000103
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 4
  episodes_total: 5469
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.369816602525996e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.6058323038948907
          entropy_coeff: 0.009999999999999998
          kl: 0.003273518285050159
          policy_loss: 0.04025183750523461
          total_loss: 1.2263277610143026
          vf_explained_var: 0.3176766633987427
          vf_loss: 1.1921342545085483
    num_agent_steps_sampled: 1748000
    num_agent_steps_trained: 1748000
    num_steps_sampled: 1748000
    num_steps_trained: 174800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1748,45186.4,1748000,-26.372,-21.6,-32.3,263.72


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1749000
  custom_metrics: {}
  date: 2021-10-22_08-19-34
  done: false
  episode_len_mean: 263.23
  episode_media: {}
  episode_reward_max: -23.30000000000006
  episode_reward_mean: -26.323000000000103
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 4
  episodes_total: 5473
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.184908301262998e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.5837072471777598
          entropy_coeff: 0.009999999999999998
          kl: 0.020539216659861064
          policy_loss: 0.026628848496410583
          total_loss: 1.194189691543579
          vf_explained_var: 0.317348837852478
          vf_loss: 1.1733979092703926
    num_agent_steps_sampled: 1749000
    num_agent_steps_trained: 1749000
    num_steps_sampled: 1749000
    num_steps_trained: 1749000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1749,45215.5,1749000,-26.323,-23.3,-32.3,263.23


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1750000
  custom_metrics: {}
  date: 2021-10-22_08-20-02
  done: false
  episode_len_mean: 263.27
  episode_media: {}
  episode_reward_max: -23.30000000000006
  episode_reward_mean: -26.3270000000001
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 4
  episodes_total: 5477
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.777362451894498e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.6501058194372389
          entropy_coeff: 0.009999999999999998
          kl: 0.0047775862837009225
          policy_loss: 0.04947600861390432
          total_loss: 1.0800700730747648
          vf_explained_var: 0.31063610315322876
          vf_loss: 1.0370951294898987
    num_agent_steps_sampled: 1750000
    num_agent_steps_trained: 1750000
    num_steps_sampled: 1750000
    num_steps_trained: 1750000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1750,45244.1,1750000,-26.327,-23.3,-32.3,263.27


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1751000
  custom_metrics: {}
  date: 2021-10-22_08-20-31
  done: false
  episode_len_mean: 262.45
  episode_media: {}
  episode_reward_max: -23.30000000000006
  episode_reward_mean: -26.2450000000001
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 4
  episodes_total: 5481
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.388681225947249e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.629681224293179
          entropy_coeff: 0.009999999999999998
          kl: 0.00329688327094178
          policy_loss: 0.019499983886877695
          total_loss: 1.2473599248462253
          vf_explained_var: 0.25923100113868713
          vf_loss: 1.23415676885181
    num_agent_steps_sampled: 1751000
    num_agent_steps_trained: 1751000
    num_steps_sampled: 1751000
    num_steps_trained: 1751000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1751,45272.7,1751000,-26.245,-23.3,-32.3,262.45


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1752000
  custom_metrics: {}
  date: 2021-10-22_08-21-00
  done: false
  episode_len_mean: 261.43
  episode_media: {}
  episode_reward_max: -23.30000000000006
  episode_reward_mean: -26.143000000000107
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 4
  episodes_total: 5485
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1943406129736246e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.7789915276898278
          entropy_coeff: 0.009999999999999998
          kl: 0.044493117715490235
          policy_loss: -0.004702987687455283
          total_loss: 1.035394083791309
          vf_explained_var: 0.39650759100914
          vf_loss: 1.0478869835535685
    num_agent_steps_sampled: 1752000
    num_agent_steps_trained: 1752000
    num_steps_sampled: 1752000
    num_steps_trained: 1752000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1752,45301.8,1752000,-26.143,-23.3,-32.3,261.43


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1753000
  custom_metrics: {}
  date: 2021-10-22_08-21-25
  done: false
  episode_len_mean: 261.67
  episode_media: {}
  episode_reward_max: -23.30000000000006
  episode_reward_mean: -26.1670000000001
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 3
  episodes_total: 5488
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7915109194604366e-54
          cur_lr: 5.000000000000001e-05
          entropy: 1.1242247270213233
          entropy_coeff: 0.009999999999999998
          kl: 0.028459029302361057
          policy_loss: 0.03721778359678057
          total_loss: 1.0125943875975079
          vf_explained_var: 0.32232874631881714
          vf_loss: 0.986618862549464
    num_agent_steps_sampled: 1753000
    num_agent_steps_trained: 1753000
    num_steps_sampled: 1753000
    num_steps_trained: 1753000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1753,45326.8,1753000,-26.167,-23.3,-32.3,261.67




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1754000
  custom_metrics: {}
  date: 2021-10-22_08-22-10
  done: false
  episode_len_mean: 261.95
  episode_media: {}
  episode_reward_max: -23.30000000000006
  episode_reward_mean: -26.195000000000103
  episode_reward_min: -33.90000000000021
  episodes_this_iter: 4
  episodes_total: 5492
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.687266379190654e-54
          cur_lr: 5.000000000000001e-05
          entropy: 1.084658365117179
          entropy_coeff: 0.009999999999999998
          kl: 0.021603970432440493
          policy_loss: -0.020749424273769062
          total_loss: 1.188548869556851
          vf_explained_var: 0.4272983968257904
          vf_loss: 1.220144878493415
    num_agent_steps_sampled: 1754000
    num_agent_steps_trained: 1754000
    num_steps_sampled: 1754000
    num_steps_trained: 1754000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1754,45372.2,1754000,-26.195,-23.3,-33.9,261.95


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1755000
  custom_metrics: {}
  date: 2021-10-22_08-22-37
  done: false
  episode_len_mean: 262.66
  episode_media: {}
  episode_reward_max: -23.30000000000006
  episode_reward_mean: -26.266000000000098
  episode_reward_min: -33.90000000000021
  episodes_this_iter: 3
  episodes_total: 5495
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.030899568785982e-54
          cur_lr: 5.000000000000001e-05
          entropy: 1.054185101058748
          entropy_coeff: 0.009999999999999998
          kl: 0.014263650917257564
          policy_loss: 0.041038641995853845
          total_loss: 0.9283359815677007
          vf_explained_var: 0.5167568922042847
          vf_loss: 0.8978391771515211
    num_agent_steps_sampled: 1755000
    num_agent_steps_trained: 1755000
    num_steps_sampled: 1755000
    num_steps_trained: 1755000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1755,45399.2,1755000,-26.266,-23.3,-33.9,262.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1756000
  custom_metrics: {}
  date: 2021-10-22_08-23-06
  done: false
  episode_len_mean: 262.38
  episode_media: {}
  episode_reward_max: -23.30000000000006
  episode_reward_mean: -26.238000000000103
  episode_reward_min: -33.90000000000021
  episodes_this_iter: 4
  episodes_total: 5499
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.030899568785982e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.7833193196190729
          entropy_coeff: 0.009999999999999998
          kl: 0.014093929149510763
          policy_loss: 0.032849399290151064
          total_loss: 1.1524305158191257
          vf_explained_var: 0.46919140219688416
          vf_loss: 1.127414306667116
    num_agent_steps_sampled: 1756000
    num_agent_steps_trained: 1756000
    num_steps_sampled: 1756000
    num_steps_trained: 175600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1756,45427.4,1756000,-26.238,-23.3,-33.9,262.38


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1757000
  custom_metrics: {}
  date: 2021-10-22_08-23-35
  done: false
  episode_len_mean: 261.86
  episode_media: {}
  episode_reward_max: -23.400000000000063
  episode_reward_mean: -26.186000000000103
  episode_reward_min: -33.90000000000021
  episodes_this_iter: 4
  episodes_total: 5503
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.030899568785982e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.8252273705270555
          entropy_coeff: 0.009999999999999998
          kl: 0.02052135756035685
          policy_loss: 0.021370931300852034
          total_loss: 1.2010100550121732
          vf_explained_var: 0.39210939407348633
          vf_loss: 1.1878913905885484
    num_agent_steps_sampled: 1757000
    num_agent_steps_trained: 1757000
    num_steps_sampled: 1757000
    num_steps_trained: 17570

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1757,45456.4,1757000,-26.186,-23.4,-33.9,261.86


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1758000
  custom_metrics: {}
  date: 2021-10-22_08-24-02
  done: false
  episode_len_mean: 261.66
  episode_media: {}
  episode_reward_max: -23.400000000000063
  episode_reward_mean: -26.1660000000001
  episode_reward_min: -33.90000000000021
  episodes_this_iter: 4
  episodes_total: 5507
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.046349353178974e-54
          cur_lr: 5.000000000000001e-05
          entropy: 1.03882973657714
          entropy_coeff: 0.009999999999999998
          kl: 0.025971144104898355
          policy_loss: -0.006428160766760508
          total_loss: 1.0850680788358054
          vf_explained_var: 0.3902190625667572
          vf_loss: 1.1018845001856485
    num_agent_steps_sampled: 1758000
    num_agent_steps_trained: 1758000
    num_steps_sampled: 1758000
    num_steps_trained: 1758000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1758,45484.1,1758000,-26.166,-23.4,-33.9,261.66


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1759000
  custom_metrics: {}
  date: 2021-10-22_08-24-31
  done: false
  episode_len_mean: 261.13
  episode_media: {}
  episode_reward_max: -23.400000000000063
  episode_reward_mean: -26.113000000000103
  episode_reward_min: -33.90000000000021
  episodes_this_iter: 4
  episodes_total: 5511
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.06952402976846e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.9307711780071258
          entropy_coeff: 0.009999999999999998
          kl: 0.009294639619308655
          policy_loss: -0.005305407030714883
          total_loss: 1.0220290078057184
          vf_explained_var: 0.4646015465259552
          vf_loss: 1.036642137500975
    num_agent_steps_sampled: 1759000
    num_agent_steps_trained: 1759000
    num_steps_sampled: 1759000
    num_steps_trained: 175900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1759,45513,1759000,-26.113,-23.4,-33.9,261.13


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1760000
  custom_metrics: {}
  date: 2021-10-22_08-25-00
  done: false
  episode_len_mean: 260.47
  episode_media: {}
  episode_reward_max: -23.400000000000063
  episode_reward_mean: -26.047000000000097
  episode_reward_min: -33.90000000000021
  episodes_this_iter: 4
  episodes_total: 5515
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.06952402976846e-54
          cur_lr: 5.000000000000001e-05
          entropy: 1.0020620081159803
          entropy_coeff: 0.009999999999999998
          kl: 0.12347738615041332
          policy_loss: -0.03660838587416543
          total_loss: 0.9952522350682153
          vf_explained_var: 0.46393030881881714
          vf_loss: 1.0418812427255841
    num_agent_steps_sampled: 1760000
    num_agent_steps_trained: 1760000
    num_steps_sampled: 1760000
    num_steps_trained: 176000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1760,45542.2,1760000,-26.047,-23.4,-33.9,260.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1761000
  custom_metrics: {}
  date: 2021-10-22_08-25-17
  done: false
  episode_len_mean: 265.19
  episode_media: {}
  episode_reward_max: -23.400000000000063
  episode_reward_mean: -26.5190000000001
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 2
  episodes_total: 5517
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.360428604465269e-53
          cur_lr: 5.000000000000001e-05
          entropy: 0.6839636405309041
          entropy_coeff: 0.009999999999999998
          kl: 0.01846064948374217
          policy_loss: 0.11102595975001653
          total_loss: 0.5047839689585898
          vf_explained_var: 0.053684383630752563
          vf_loss: 0.40059764890207183
    num_agent_steps_sampled: 1761000
    num_agent_steps_trained: 1761000
    num_steps_sampled: 1761000
    num_steps_trained: 176100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1761,45558.5,1761000,-26.519,-23.4,-76,265.19


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1762000
  custom_metrics: {}
  date: 2021-10-22_08-25-34
  done: false
  episode_len_mean: 268.65
  episode_media: {}
  episode_reward_max: -23.400000000000063
  episode_reward_mean: -26.8650000000001
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 2
  episodes_total: 5519
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.360428604465269e-53
          cur_lr: 5.000000000000001e-05
          entropy: 0.9800314300590092
          entropy_coeff: 0.009999999999999998
          kl: 0.12594444494303075
          policy_loss: 0.07156504740317662
          total_loss: 0.28939654992686376
          vf_explained_var: -0.11922597140073776
          vf_loss: 0.22763181710615754
    num_agent_steps_sampled: 1762000
    num_agent_steps_trained: 1762000
    num_steps_sampled: 1762000
    num_steps_trained: 17620

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1762,45575.9,1762000,-26.865,-23.4,-76,268.65




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1763000
  custom_metrics: {}
  date: 2021-10-22_08-26-16
  done: false
  episode_len_mean: 269.92
  episode_media: {}
  episode_reward_max: -23.400000000000063
  episode_reward_mean: -26.992000000000107
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 3
  episodes_total: 5522
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0406429066979033e-53
          cur_lr: 5.000000000000001e-05
          entropy: 1.3944405231210921
          entropy_coeff: 0.009999999999999998
          kl: 0.06217745498232211
          policy_loss: 0.03349673532777363
          total_loss: 0.8676900065607495
          vf_explained_var: 0.06019224226474762
          vf_loss: 0.8481376626425319
    num_agent_steps_sampled: 1763000
    num_agent_steps_trained: 1763000
    num_steps_sampled: 1763000
    num_steps_trained: 17630

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1763,45618.2,1763000,-26.992,-23.4,-76,269.92


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1764000
  custom_metrics: {}
  date: 2021-10-22_08-26-40
  done: false
  episode_len_mean: 271.72
  episode_media: {}
  episode_reward_max: -23.400000000000063
  episode_reward_mean: -27.172000000000107
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 3
  episodes_total: 5525
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0609643600468555e-53
          cur_lr: 5.000000000000001e-05
          entropy: 1.3539228465822009
          entropy_coeff: 0.009999999999999998
          kl: 0.053766960893815725
          policy_loss: -0.04206546685761876
          total_loss: 0.7716182106071048
          vf_explained_var: 0.6391671299934387
          vf_loss: 0.8272228958706062
    num_agent_steps_sampled: 1764000
    num_agent_steps_trained: 1764000
    num_steps_sampled: 1764000
    num_steps_trained: 1764

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1764,45641.4,1764000,-27.172,-23.4,-76,271.72


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1765000
  custom_metrics: {}
  date: 2021-10-22_08-27-06
  done: false
  episode_len_mean: 272.08
  episode_media: {}
  episode_reward_max: -23.400000000000063
  episode_reward_mean: -27.208000000000112
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 3
  episodes_total: 5528
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.591446540070283e-53
          cur_lr: 5.000000000000001e-05
          entropy: 1.2100995514127943
          entropy_coeff: 0.009999999999999998
          kl: 0.016183094381584735
          policy_loss: -0.09229161747627788
          total_loss: 0.8992300238874223
          vf_explained_var: 0.613401472568512
          vf_loss: 1.0036226365301344
    num_agent_steps_sampled: 1765000
    num_agent_steps_trained: 1765000
    num_steps_sampled: 1765000
    num_steps_trained: 176500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1765,45667.9,1765000,-27.208,-23.4,-76,272.08


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1766000
  custom_metrics: {}
  date: 2021-10-22_08-27-34
  done: false
  episode_len_mean: 272.76
  episode_media: {}
  episode_reward_max: -24.00000000000007
  episode_reward_mean: -27.276000000000113
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 4
  episodes_total: 5532
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.591446540070283e-53
          cur_lr: 5.000000000000001e-05
          entropy: 1.1104757534133063
          entropy_coeff: 0.009999999999999998
          kl: 0.040821442538033166
          policy_loss: 0.005086488525072734
          total_loss: 1.1873316162162357
          vf_explained_var: 0.4786330759525299
          vf_loss: 1.1933498859405518
    num_agent_steps_sampled: 1766000
    num_agent_steps_trained: 1766000
    num_steps_sampled: 1766000
    num_steps_trained: 176600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1766,45696,1766000,-27.276,-24,-76,272.76


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1767000
  custom_metrics: {}
  date: 2021-10-22_08-28-02
  done: false
  episode_len_mean: 272.9
  episode_media: {}
  episode_reward_max: -24.00000000000007
  episode_reward_mean: -27.29000000000011
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 4
  episodes_total: 5536
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.887169810105424e-53
          cur_lr: 5.000000000000001e-05
          entropy: 1.0940106173356374
          entropy_coeff: 0.009999999999999998
          kl: 0.06887517766279434
          policy_loss: -0.00954868302990993
          total_loss: 0.9895383430851831
          vf_explained_var: 0.5677967667579651
          vf_loss: 1.0100271191861894
    num_agent_steps_sampled: 1767000
    num_agent_steps_trained: 1767000
    num_steps_sampled: 1767000
    num_steps_trained: 1767000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1767,45723.7,1767000,-27.29,-24,-76,272.9


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1768000
  custom_metrics: {}
  date: 2021-10-22_08-28-30
  done: false
  episode_len_mean: 273.33
  episode_media: {}
  episode_reward_max: -24.00000000000007
  episode_reward_mean: -27.333000000000105
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 4
  episodes_total: 5540
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0330754715158136e-52
          cur_lr: 5.000000000000001e-05
          entropy: 1.044700931178199
          entropy_coeff: 0.009999999999999998
          kl: 0.029113277386016514
          policy_loss: 0.04533413292633163
          total_loss: 1.0767525997426775
          vf_explained_var: 0.4802916944026947
          vf_loss: 1.0418654965029823
    num_agent_steps_sampled: 1768000
    num_agent_steps_trained: 1768000
    num_steps_sampled: 1768000
    num_steps_trained: 1768000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1768,45751.7,1768000,-27.333,-24,-76,273.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1769000
  custom_metrics: {}
  date: 2021-10-22_08-28-54
  done: false
  episode_len_mean: 273.64
  episode_media: {}
  episode_reward_max: -24.00000000000007
  episode_reward_mean: -27.364000000000114
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 3
  episodes_total: 5543
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5496132072737203e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.9091563291019864
          entropy_coeff: 0.009999999999999998
          kl: 0.03646827255238697
          policy_loss: -0.08312373989158206
          total_loss: 0.9441191342141894
          vf_explained_var: 0.3974241018295288
          vf_loss: 1.0363344377941555
    num_agent_steps_sampled: 1769000
    num_agent_steps_trained: 1769000
    num_steps_sampled: 1769000
    num_steps_trained: 176900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1769,45775.5,1769000,-27.364,-24,-76,273.64


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1770000
  custom_metrics: {}
  date: 2021-10-22_08-29-21
  done: false
  episode_len_mean: 276.17
  episode_media: {}
  episode_reward_max: -24.00000000000007
  episode_reward_mean: -27.61700000000011
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 4
  episodes_total: 5547
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.32441981091058e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.9010116663244035
          entropy_coeff: 0.009999999999999998
          kl: 0.03507303970751631
          policy_loss: 0.07635637203024494
          total_loss: 0.8157190796401765
          vf_explained_var: 0.4735918641090393
          vf_loss: 0.7483728210131327
    num_agent_steps_sampled: 1770000
    num_agent_steps_trained: 1770000
    num_steps_sampled: 1770000
    num_steps_trained: 1770000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1770,45802.8,1770000,-27.617,-24,-76,276.17




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1771000
  custom_metrics: {}
  date: 2021-10-22_08-30-08
  done: false
  episode_len_mean: 276.06
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -27.60600000000011
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 4
  episodes_total: 5551
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.486629716365869e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.7986610392729442
          entropy_coeff: 0.009999999999999998
          kl: 0.008868048238460189
          policy_loss: 0.016602005312840142
          total_loss: 1.0034617841243745
          vf_explained_var: 0.4303833842277527
          vf_loss: 0.994846377770106
    num_agent_steps_sampled: 1771000
    num_agent_steps_trained: 1771000
    num_steps_sampled: 1771000
    num_steps_trained: 1771000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1771,45849.2,1771000,-27.606,-22,-76,276.06


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1772000
  custom_metrics: {}
  date: 2021-10-22_08-30-35
  done: false
  episode_len_mean: 276.96
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -27.69600000000011
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 3
  episodes_total: 5554
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.486629716365869e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.954062713517083
          entropy_coeff: 0.009999999999999998
          kl: 0.025695954127250707
          policy_loss: 0.010289086567031012
          total_loss: 0.6961555729309717
          vf_explained_var: 0.39415478706359863
          vf_loss: 0.6954070998562707
    num_agent_steps_sampled: 1772000
    num_agent_steps_trained: 1772000
    num_steps_sampled: 1772000
    num_steps_trained: 177200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1772,45876.6,1772000,-27.696,-22,-76,276.96


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1773000
  custom_metrics: {}
  date: 2021-10-22_08-31-03
  done: false
  episode_len_mean: 276.79
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -27.679000000000116
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 4
  episodes_total: 5558
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.2299445745488055e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.7968288739522298
          entropy_coeff: 0.009999999999999998
          kl: 0.012075542001686776
          policy_loss: -0.004514612009127935
          total_loss: 0.9768355170885722
          vf_explained_var: 0.4063052833080292
          vf_loss: 0.9893184224764506
    num_agent_steps_sampled: 1773000
    num_agent_steps_trained: 1773000
    num_steps_sampled: 1773000
    num_steps_trained: 177

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1773,45904.5,1773000,-27.679,-22,-76,276.79


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1774000
  custom_metrics: {}
  date: 2021-10-22_08-31-34
  done: false
  episode_len_mean: 277.16
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -27.71600000000012
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 4
  episodes_total: 5562
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.2299445745488055e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.7922776480515797
          entropy_coeff: 0.009999999999999998
          kl: 0.010479226732491752
          policy_loss: 0.024311590194702148
          total_loss: 0.8184149066607157
          vf_explained_var: 0.4444475769996643
          vf_loss: 0.8020260906881757
    num_agent_steps_sampled: 1774000
    num_agent_steps_trained: 1774000
    num_steps_sampled: 1774000
    num_steps_trained: 17740

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1774,45935.1,1774000,-27.716,-22,-76,277.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1775000
  custom_metrics: {}
  date: 2021-10-22_08-32-01
  done: false
  episode_len_mean: 277.74
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -27.774000000000125
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 4
  episodes_total: 5566
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.2299445745488055e-52
          cur_lr: 5.000000000000001e-05
          entropy: 1.0230511400434705
          entropy_coeff: 0.009999999999999998
          kl: 0.029424082939910078
          policy_loss: 0.01564127008120219
          total_loss: 1.1962046795421177
          vf_explained_var: 0.3248939514160156
          vf_loss: 1.1907939235369365
    num_agent_steps_sampled: 1775000
    num_agent_steps_trained: 1775000
    num_steps_sampled: 1775000
    num_steps_trained: 17750

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1775,45962.4,1775000,-27.774,-22,-76,277.74


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1776000
  custom_metrics: {}
  date: 2021-10-22_08-32-29
  done: false
  episode_len_mean: 278.38
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -27.83800000000012
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 4
  episodes_total: 5570
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.844916861823208e-52
          cur_lr: 5.000000000000001e-05
          entropy: 1.0870920730961693
          entropy_coeff: 0.009999999999999998
          kl: 0.016320492049088545
          policy_loss: 0.014879741892218589
          total_loss: 1.1288093752331203
          vf_explained_var: 0.3432452380657196
          vf_loss: 1.124800553586748
    num_agent_steps_sampled: 1776000
    num_agent_steps_trained: 1776000
    num_steps_sampled: 1776000
    num_steps_trained: 1776000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1776,45990.6,1776000,-27.838,-22,-76,278.38


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1777000
  custom_metrics: {}
  date: 2021-10-22_08-32-56
  done: false
  episode_len_mean: 279.16
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -27.916000000000114
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 3
  episodes_total: 5573
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.844916861823208e-52
          cur_lr: 5.000000000000001e-05
          entropy: 1.1092089513937633
          entropy_coeff: 0.009999999999999998
          kl: 0.028646650740785793
          policy_loss: -0.0025539112587769828
          total_loss: 0.8146819949150086
          vf_explained_var: 0.47677481174468994
          vf_loss: 0.8283279983533753
    num_agent_steps_sampled: 1777000
    num_agent_steps_trained: 1777000
    num_steps_sampled: 1777000
    num_steps_trained: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1777,46017.5,1777000,-27.916,-22,-76,279.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1778000
  custom_metrics: {}
  date: 2021-10-22_08-33-23
  done: false
  episode_len_mean: 280.07
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -28.007000000000122
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 4
  episodes_total: 5577
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1767375292734814e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.9988862401909299
          entropy_coeff: 0.009999999999999998
          kl: 0.041898270417020295
          policy_loss: -0.042103776997990075
          total_loss: 1.062810183233685
          vf_explained_var: 0.4952637553215027
          vf_loss: 1.1149028440316517
    num_agent_steps_sampled: 1778000
    num_agent_steps_trained: 1778000
    num_steps_sampled: 1778000
    num_steps_trained: 1778

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1778,46044.4,1778000,-28.007,-22,-76,280.07




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1779000
  custom_metrics: {}
  date: 2021-10-22_08-34-10
  done: false
  episode_len_mean: 279.63
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -27.963000000000115
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 4
  episodes_total: 5581
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7651062939102216e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.7140209092034234
          entropy_coeff: 0.009999999999999998
          kl: 0.012861566338341113
          policy_loss: 0.052661195480161245
          total_loss: 1.2740660150845846
          vf_explained_var: 0.36788830161094666
          vf_loss: 1.2285450180371602
    num_agent_steps_sampled: 1779000
    num_agent_steps_trained: 1779000
    num_steps_sampled: 1779000
    num_steps_trained: 1779

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1779,46091.4,1779000,-27.963,-21.2,-76,279.63


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1780000
  custom_metrics: {}
  date: 2021-10-22_08-34-41
  done: false
  episode_len_mean: 278.89
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -27.889000000000113
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 4
  episodes_total: 5585
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7651062939102216e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.6924340274598864
          entropy_coeff: 0.009999999999999998
          kl: 0.009507492083226845
          policy_loss: 0.052891138402952086
          total_loss: 1.128157369295756
          vf_explained_var: 0.37781935930252075
          vf_loss: 1.0821905593077341
    num_agent_steps_sampled: 1780000
    num_agent_steps_trained: 1780000
    num_steps_sampled: 1780000
    num_steps_trained: 17800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1780,46122,1780000,-27.889,-21.2,-76,278.89


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1781000
  custom_metrics: {}
  date: 2021-10-22_08-35-11
  done: false
  episode_len_mean: 276.79
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -27.679000000000116
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 4
  episodes_total: 5589
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7651062939102216e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.7586481829484304
          entropy_coeff: 0.009999999999999998
          kl: 0.010806299763402983
          policy_loss: 0.025233728604184255
          total_loss: 0.9943763110372755
          vf_explained_var: 0.5003501176834106
          vf_loss: 0.9767290585570865
    num_agent_steps_sampled: 1781000
    num_agent_steps_trained: 1781000
    num_steps_sampled: 1781000
    num_steps_trained: 17810

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1781,46152.8,1781000,-27.679,-21.2,-76,276.79


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1782000
  custom_metrics: {}
  date: 2021-10-22_08-35-43
  done: false
  episode_len_mean: 275.13
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -27.513000000000112
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 4
  episodes_total: 5593
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7651062939102216e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.6637815872828166
          entropy_coeff: 0.009999999999999998
          kl: 0.05535392384950026
          policy_loss: -0.005295334135492643
          total_loss: 0.9801136473814647
          vf_explained_var: 0.4790244996547699
          vf_loss: 0.9920467982689539
    num_agent_steps_sampled: 1782000
    num_agent_steps_trained: 1782000
    num_steps_sampled: 1782000
    num_steps_trained: 17820

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1782,46184,1782000,-27.513,-21.2,-76,275.13


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1783000
  custom_metrics: {}
  date: 2021-10-22_08-36-14
  done: false
  episode_len_mean: 273.97
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -27.39700000000011
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 5
  episodes_total: 5598
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6476594408653328e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.7183603207270305
          entropy_coeff: 0.009999999999999998
          kl: 0.04665632580387356
          policy_loss: -0.006033486872911453
          total_loss: 1.118028653330273
          vf_explained_var: 0.6378614902496338
          vf_loss: 1.131245736281077
    num_agent_steps_sampled: 1783000
    num_agent_steps_trained: 1783000
    num_steps_sampled: 1783000
    num_steps_trained: 1783000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1783,46215.5,1783000,-27.397,-21.2,-76,273.97


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1784000
  custom_metrics: {}
  date: 2021-10-22_08-36-47
  done: false
  episode_len_mean: 273.26
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -27.326000000000114
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 4
  episodes_total: 5602
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.971489161297999e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.8357132527563307
          entropy_coeff: 0.009999999999999998
          kl: 0.07955655690410973
          policy_loss: 0.05709340257777108
          total_loss: 0.6966886003812154
          vf_explained_var: 0.7910248637199402
          vf_loss: 0.6479523364040587
    num_agent_steps_sampled: 1784000
    num_agent_steps_trained: 1784000
    num_steps_sampled: 1784000
    num_steps_trained: 1784000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1784,46248.5,1784000,-27.326,-21.2,-76,273.26


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1785000
  custom_metrics: {}
  date: 2021-10-22_08-37-19
  done: false
  episode_len_mean: 273.18
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -27.318000000000115
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 3
  episodes_total: 5605
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.957233741947e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.9497738848129909
          entropy_coeff: 0.009999999999999998
          kl: 0.10885438677598813
          policy_loss: -0.09331202167603704
          total_loss: 0.8611070414384207
          vf_explained_var: 0.6026910543441772
          vf_loss: 0.9639168050554063
    num_agent_steps_sampled: 1785000
    num_agent_steps_trained: 1785000
    num_steps_sampled: 1785000
    num_steps_trained: 1785000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1785,46280.1,1785000,-27.318,-21.2,-76,273.18




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1786000
  custom_metrics: {}
  date: 2021-10-22_08-38-08
  done: false
  episode_len_mean: 271.8
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -27.180000000000106
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 5
  episodes_total: 5610
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.935850612920498e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.7834206461906433
          entropy_coeff: 0.009999999999999998
          kl: 0.07750468321678802
          policy_loss: -0.03665472310450342
          total_loss: 1.00875842736827
          vf_explained_var: 0.6901757717132568
          vf_loss: 1.0532473531034259
    num_agent_steps_sampled: 1786000
    num_agent_steps_trained: 1786000
    num_steps_sampled: 1786000
    num_steps_trained: 1786000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1786,46329,1786000,-27.18,-20.8,-76,271.8


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1787000
  custom_metrics: {}
  date: 2021-10-22_08-38-39
  done: false
  episode_len_mean: 271.29
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -27.129000000000104
  episode_reward_min: -75.99999999999996
  episodes_this_iter: 4
  episodes_total: 5614
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3403775919380745e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.7154140293598175
          entropy_coeff: 0.009999999999999998
          kl: 0.0446310779631795
          policy_loss: 0.039321609545085165
          total_loss: 0.40855024870898987
          vf_explained_var: 0.8725413084030151
          vf_loss: 0.376382781068484
    num_agent_steps_sampled: 1787000
    num_agent_steps_trained: 1787000
    num_steps_sampled: 1787000
    num_steps_trained: 178700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1787,46360.3,1787000,-27.129,-20.8,-76,271.29


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1788000
  custom_metrics: {}
  date: 2021-10-22_08-39-11
  done: false
  episode_len_mean: 262.79
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -26.279000000000106
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 4
  episodes_total: 5618
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0105663879071118e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.46875594755013783
          entropy_coeff: 0.009999999999999998
          kl: 0.01946555607438035
          policy_loss: 0.038367292823063004
          total_loss: 0.5434622433450487
          vf_explained_var: 0.6716932654380798
          vf_loss: 0.5097825056976742
    num_agent_steps_sampled: 1788000
    num_agent_steps_trained: 1788000
    num_steps_sampled: 1788000
    num_steps_trained: 1788

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1788,46392.7,1788000,-26.279,-20.8,-45.7,262.79


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1789000
  custom_metrics: {}
  date: 2021-10-22_08-39-43
  done: false
  episode_len_mean: 259.85
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -25.9850000000001
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 4
  episodes_total: 5622
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0105663879071118e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.5846733020411597
          entropy_coeff: 0.009999999999999998
          kl: 0.010054746727290523
          policy_loss: -0.04179387353360653
          total_loss: 0.5504177014032999
          vf_explained_var: 0.480360209941864
          vf_loss: 0.5980583051840465
    num_agent_steps_sampled: 1789000
    num_agent_steps_trained: 1789000
    num_steps_sampled: 1789000
    num_steps_trained: 1789000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1789,46424.5,1789000,-25.985,-20.8,-45.7,259.85


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1790000
  custom_metrics: {}
  date: 2021-10-22_08-40-16
  done: false
  episode_len_mean: 256.46
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -25.646000000000093
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 4
  episodes_total: 5626
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0105663879071118e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.5536049322949516
          entropy_coeff: 0.009999999999999998
          kl: 0.011694965647483515
          policy_loss: -0.1043497997853491
          total_loss: 0.749373545911577
          vf_explained_var: 0.34486618638038635
          vf_loss: 0.8592593964603212
    num_agent_steps_sampled: 1790000
    num_agent_steps_trained: 1790000
    num_steps_sampled: 1790000
    num_steps_trained: 17900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1790,46457.3,1790000,-25.646,-20.8,-45.7,256.46


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1791000
  custom_metrics: {}
  date: 2021-10-22_08-40-47
  done: false
  episode_len_mean: 254.69
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -25.469000000000097
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 4
  episodes_total: 5630
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0105663879071118e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.6597308721807268
          entropy_coeff: 0.009999999999999998
          kl: 0.012771128821604805
          policy_loss: -0.11479389783408907
          total_loss: 0.528734760483106
          vf_explained_var: 0.5827903747558594
          vf_loss: 0.6501259727610482
    num_agent_steps_sampled: 1791000
    num_agent_steps_trained: 1791000
    num_steps_sampled: 1791000
    num_steps_trained: 17910

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1791,46488,1791000,-25.469,-20.8,-45.7,254.69


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1792000
  custom_metrics: {}
  date: 2021-10-22_08-41-16
  done: false
  episode_len_mean: 255.07
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -25.507000000000094
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 4
  episodes_total: 5634
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0105663879071118e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.8580425391594569
          entropy_coeff: 0.009999999999999998
          kl: 0.06596676418504582
          policy_loss: 0.05606135626633962
          total_loss: 0.7594132969776789
          vf_explained_var: 0.45829179883003235
          vf_loss: 0.711932365099589
    num_agent_steps_sampled: 1792000
    num_agent_steps_trained: 1792000
    num_steps_sampled: 1792000
    num_steps_trained: 179200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1792,46516.7,1792000,-25.507,-20.8,-45.7,255.07


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1793000
  custom_metrics: {}
  date: 2021-10-22_08-41-49
  done: false
  episode_len_mean: 254.07
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -25.407000000000092
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 4
  episodes_total: 5638
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0158495818606675e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.5983103818363613
          entropy_coeff: 0.009999999999999998
          kl: 0.00864873924445517
          policy_loss: -0.06594497503505813
          total_loss: 0.8184106628100077
          vf_explained_var: 0.3982852101325989
          vf_loss: 0.8903387440575494
    num_agent_steps_sampled: 1793000
    num_agent_steps_trained: 1793000
    num_steps_sampled: 1793000
    num_steps_trained: 17930

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1793,46550.6,1793000,-25.407,-20.8,-45.7,254.07




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1794000
  custom_metrics: {}
  date: 2021-10-22_08-42-40
  done: false
  episode_len_mean: 252.33
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -25.23300000000009
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 5
  episodes_total: 5643
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0158495818606675e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.6650696022642983
          entropy_coeff: 0.009999999999999998
          kl: 0.10464434019793947
          policy_loss: -0.017900836281478406
          total_loss: 0.6932599027951558
          vf_explained_var: 0.6079586744308472
          vf_loss: 0.717811440759235
    num_agent_steps_sampled: 1794000
    num_agent_steps_trained: 1794000
    num_steps_sampled: 1794000
    num_steps_trained: 179400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1794,46601.1,1794000,-25.233,-20.1,-45.7,252.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1795000
  custom_metrics: {}
  date: 2021-10-22_08-43-15
  done: false
  episode_len_mean: 249.16
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.916000000000086
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 5647
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.523774372791002e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.3389507793717914
          entropy_coeff: 0.009999999999999998
          kl: 0.023603919494485766
          policy_loss: 0.03303166487150722
          total_loss: 0.8418169571293725
          vf_explained_var: 0.4304109215736389
          vf_loss: 0.8121748043431176
    num_agent_steps_sampled: 1795000
    num_agent_steps_trained: 1795000
    num_steps_sampled: 1795000
    num_steps_trained: 17950

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1795,46636,1795000,-24.916,-20.1,-32.7,249.16


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1796000
  custom_metrics: {}
  date: 2021-10-22_08-43-51
  done: false
  episode_len_mean: 248.59
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.859000000000087
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 5651
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.785661559186506e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.36160001953442894
          entropy_coeff: 0.009999999999999998
          kl: 0.005499356931621454
          policy_loss: -0.07301273917158445
          total_loss: 0.8494674623012543
          vf_explained_var: 0.2614775598049164
          vf_loss: 0.9260962042543623
    num_agent_steps_sampled: 1796000
    num_agent_steps_trained: 1796000
    num_steps_sampled: 1796000
    num_steps_trained: 179

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1796,46671.6,1796000,-24.859,-20.1,-32.7,248.59


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1797000
  custom_metrics: {}
  date: 2021-10-22_08-44-24
  done: false
  episode_len_mean: 246.56
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.656000000000084
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 5
  episodes_total: 5656
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.785661559186506e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.36896748708354105
          entropy_coeff: 0.009999999999999998
          kl: 0.019792258506928326
          policy_loss: -0.0004142832424905565
          total_loss: 1.033658191230562
          vf_explained_var: 0.3589640259742737
          vf_loss: 1.037762161095937
    num_agent_steps_sampled: 1797000
    num_agent_steps_trained: 1797000
    num_steps_sampled: 1797000
    num_steps_trained: 179

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1797,46704.9,1797000,-24.656,-20.1,-32.7,246.56


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1798000
  custom_metrics: {}
  date: 2021-10-22_08-44-59
  done: false
  episode_len_mean: 245.51
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.55100000000008
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 5660
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.785661559186506e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.26803915401299794
          entropy_coeff: 0.009999999999999998
          kl: 0.004570780310116934
          policy_loss: 0.02686638248463472
          total_loss: 0.6901524292098151
          vf_explained_var: 0.5037327408790588
          vf_loss: 0.6659664279884763
    num_agent_steps_sampled: 1798000
    num_agent_steps_trained: 1798000
    num_steps_sampled: 1798000
    num_steps_trained: 17980

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1798,46740.4,1798000,-24.551,-20.1,-32.7,245.51


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1799000
  custom_metrics: {}
  date: 2021-10-22_08-45-35
  done: false
  episode_len_mean: 244.67
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.46700000000007
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 5664
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.392830779593253e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.34884798559877606
          entropy_coeff: 0.009999999999999998
          kl: 0.012133694285773957
          policy_loss: 0.00813819310731358
          total_loss: 0.8591806782616509
          vf_explained_var: 0.401904433965683
          vf_loss: 0.8545309669441647
    num_agent_steps_sampled: 1799000
    num_agent_steps_trained: 1799000
    num_steps_sampled: 1799000
    num_steps_trained: 179900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1799,46776.1,1799000,-24.467,-20.1,-32.7,244.67


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1800000
  custom_metrics: {}
  date: 2021-10-22_08-46-10
  done: false
  episode_len_mean: 243.35
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.33500000000008
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 5668
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.392830779593253e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.35489438076814017
          entropy_coeff: 0.009999999999999998
          kl: 0.03661123405711078
          policy_loss: -0.032782633023129566
          total_loss: 0.7602790673573812
          vf_explained_var: 0.45237869024276733
          vf_loss: 0.7966106341944801
    num_agent_steps_sampled: 1800000
    num_agent_steps_trained: 1800000
    num_steps_sampled: 1800000
    num_steps_trained: 180

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1800,46810.8,1800000,-24.335,-20.1,-32.7,243.35




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1801000
  custom_metrics: {}
  date: 2021-10-22_08-46-58
  done: false
  episode_len_mean: 241.93
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.19300000000007
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 5672
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.089246169389879e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.41580123007297515
          entropy_coeff: 0.009999999999999998
          kl: 0.01187438932013865
          policy_loss: -0.08829354147116343
          total_loss: 1.123637619945738
          vf_explained_var: 0.3305807113647461
          vf_loss: 1.2160891671975453
    num_agent_steps_sampled: 1801000
    num_agent_steps_trained: 1801000
    num_steps_sampled: 1801000
    num_steps_trained: 180100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1801,46859.2,1801000,-24.193,-20.1,-32.7,241.93


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1802000
  custom_metrics: {}
  date: 2021-10-22_08-47-33
  done: false
  episode_len_mean: 241.05
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.10500000000008
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 5676
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.089246169389879e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.34269044879410004
          entropy_coeff: 0.009999999999999998
          kl: 0.017246905976372143
          policy_loss: -0.08392604175541137
          total_loss: 0.9800911347071329
          vf_explained_var: 0.2720619738101959
          vf_loss: 1.06744408276346
    num_agent_steps_sampled: 1802000
    num_agent_steps_trained: 1802000
    num_steps_sampled: 1802000
    num_steps_trained: 180200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1802,46893.9,1802000,-24.105,-20.1,-32.7,241.05


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1803000
  custom_metrics: {}
  date: 2021-10-22_08-48-07
  done: false
  episode_len_mean: 240.63
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.06300000000007
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 5
  episodes_total: 5681
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.089246169389879e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.35714559422598946
          entropy_coeff: 0.009999999999999998
          kl: 0.031386259066658956
          policy_loss: -0.006631973965300454
          total_loss: 1.178184093369378
          vf_explained_var: 0.3153975307941437
          vf_loss: 1.1883875290552774
    num_agent_steps_sampled: 1803000
    num_agent_steps_trained: 1803000
    num_steps_sampled: 1803000
    num_steps_trained: 1803

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1803,46928.4,1803000,-24.063,-20.1,-32.7,240.63


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1804000
  custom_metrics: {}
  date: 2021-10-22_08-48-41
  done: false
  episode_len_mean: 241.39
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.13900000000007
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 5685
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.633869254084817e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.477285392747985
          entropy_coeff: 0.009999999999999998
          kl: 0.44288125706836623
          policy_loss: 0.026963973914583526
          total_loss: 1.0517229944467545
          vf_explained_var: 0.3672771155834198
          vf_loss: 1.0295318712790806
    num_agent_steps_sampled: 1804000
    num_agent_steps_trained: 1804000
    num_steps_sampled: 1804000
    num_steps_trained: 1804000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1804,46961.5,1804000,-24.139,-20.1,-32.7,241.39


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1805000
  custom_metrics: {}
  date: 2021-10-22_08-49-10
  done: false
  episode_len_mean: 242.78
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.27800000000007
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 3
  episodes_total: 5688
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1450803881127225e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.8557931184768677
          entropy_coeff: 0.009999999999999998
          kl: 0.04609943331563468
          policy_loss: 0.05655190762546328
          total_loss: 0.7661401622825199
          vf_explained_var: 0.254783570766449
          vf_loss: 0.7181461811065674
    num_agent_steps_sampled: 1805000
    num_agent_steps_trained: 1805000
    num_steps_sampled: 1805000
    num_steps_trained: 1805000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1805,46990.8,1805000,-24.278,-20.1,-32.7,242.78


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1806000
  custom_metrics: {}
  date: 2021-10-22_08-49-38
  done: false
  episode_len_mean: 244.34
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.43400000000008
  episode_reward_min: -34.50000000000022
  episodes_this_iter: 4
  episodes_total: 5692
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7176205821690835e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.6486631800731023
          entropy_coeff: 0.009999999999999998
          kl: 0.03390403212243093
          policy_loss: -0.016240239557292726
          total_loss: 1.0137702822685242
          vf_explained_var: 0.26015332341194153
          vf_loss: 1.0364971531762017
    num_agent_steps_sampled: 1806000
    num_agent_steps_trained: 1806000
    num_steps_sampled: 1806000
    num_steps_trained: 1806

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1806,47019.3,1806000,-24.434,-20.1,-34.5,244.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1807000
  custom_metrics: {}
  date: 2021-10-22_08-50-09
  done: false
  episode_len_mean: 245.35
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.53500000000008
  episode_reward_min: -34.50000000000022
  episodes_this_iter: 3
  episodes_total: 5695
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.576430873253626e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.5912690652741326
          entropy_coeff: 0.009999999999999998
          kl: 0.020045894600911475
          policy_loss: -0.008149586617946625
          total_loss: 0.6517084267404344
          vf_explained_var: 0.4556201100349426
          vf_loss: 0.6657707048787012
    num_agent_steps_sampled: 1807000
    num_agent_steps_trained: 1807000
    num_steps_sampled: 1807000
    num_steps_trained: 18070

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1807,47050,1807000,-24.535,-20.1,-34.5,245.35


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1808000
  custom_metrics: {}
  date: 2021-10-22_08-50-43
  done: false
  episode_len_mean: 245.67
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.567000000000075
  episode_reward_min: -34.50000000000022
  episodes_this_iter: 4
  episodes_total: 5699
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.8646463098804395e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.4439610550800959
          entropy_coeff: 0.009999999999999998
          kl: 0.026496437636340412
          policy_loss: -0.08259819021655454
          total_loss: 1.0444987171226077
          vf_explained_var: 0.2674744427204132
          vf_loss: 1.1315365188651614
    num_agent_steps_sampled: 1808000
    num_agent_steps_trained: 1808000
    num_steps_sampled: 1808000
    num_steps_trained: 1808

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1808,47084,1808000,-24.567,-20.1,-34.5,245.67




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1809000
  custom_metrics: {}
  date: 2021-10-22_08-51-35
  done: false
  episode_len_mean: 245.33
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.53300000000008
  episode_reward_min: -34.50000000000022
  episodes_this_iter: 5
  episodes_total: 5704
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.7969694648206584e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.29774637238846885
          entropy_coeff: 0.009999999999999998
          kl: 0.034181219840264516
          policy_loss: -0.016259019614921676
          total_loss: 0.8362709098392063
          vf_explained_var: 0.4790521264076233
          vf_loss: 0.8555073913600709
    num_agent_steps_sampled: 1809000
    num_agent_steps_trained: 1809000
    num_steps_sampled: 1809000
    num_steps_trained: 180

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1809,47135.8,1809000,-24.533,-20.1,-34.5,245.33


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1810000
  custom_metrics: {}
  date: 2021-10-22_08-52-10
  done: false
  episode_len_mean: 244.47
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.44700000000008
  episode_reward_min: -34.50000000000022
  episodes_this_iter: 4
  episodes_total: 5708
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.695454197230988e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.3702220968074269
          entropy_coeff: 0.009999999999999998
          kl: 0.07446756322711362
          policy_loss: -0.021086666898594963
          total_loss: 0.7783951991134219
          vf_explained_var: 0.4760698080062866
          vf_loss: 0.803184085422092
    num_agent_steps_sampled: 1810000
    num_agent_steps_trained: 1810000
    num_steps_sampled: 1810000
    num_steps_trained: 1810000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1810,47170.9,1810000,-24.447,-20.1,-34.5,244.47


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1811000
  custom_metrics: {}
  date: 2021-10-22_08-52-43
  done: false
  episode_len_mean: 245.0
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.50000000000008
  episode_reward_min: -34.50000000000022
  episodes_this_iter: 4
  episodes_total: 5712
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.304318129584648e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.47669715914461347
          entropy_coeff: 0.009999999999999998
          kl: 0.03222769358249293
          policy_loss: 0.016431129309866163
          total_loss: 0.5957771789696481
          vf_explained_var: 0.6908987164497375
          vf_loss: 0.5841130206982295
    num_agent_steps_sampled: 1811000
    num_agent_steps_trained: 1811000
    num_steps_sampled: 1811000
    num_steps_trained: 1811000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1811,47204.2,1811000,-24.5,-20.1,-34.5,245


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1812000
  custom_metrics: {}
  date: 2021-10-22_08-53-17
  done: false
  episode_len_mean: 245.6
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.560000000000077
  episode_reward_min: -34.50000000000022
  episodes_this_iter: 4
  episodes_total: 5716
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9564771943769726e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.4991718176338408
          entropy_coeff: 0.009999999999999998
          kl: 0.03846172665274202
          policy_loss: 0.03520832790268792
          total_loss: 0.5651651208599409
          vf_explained_var: 0.8461235165596008
          vf_loss: 0.5349485102627013
    num_agent_steps_sampled: 1812000
    num_agent_steps_trained: 1812000
    num_steps_sampled: 1812000
    num_steps_trained: 1812000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1812,47237.5,1812000,-24.56,-20.1,-34.5,245.6


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1813000
  custom_metrics: {}
  date: 2021-10-22_08-53-50
  done: false
  episode_len_mean: 246.77
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.67700000000008
  episode_reward_min: -34.50000000000022
  episodes_this_iter: 4
  episodes_total: 5720
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.934715791565458e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.5840118755896886
          entropy_coeff: 0.009999999999999998
          kl: 0.22574651968504492
          policy_loss: 0.007837676753600439
          total_loss: 0.9690786494149102
          vf_explained_var: 0.49047330021858215
          vf_loss: 0.9670811030599806
    num_agent_steps_sampled: 1813000
    num_agent_steps_trained: 1813000
    num_steps_sampled: 1813000
    num_steps_trained: 181300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1813,47270.2,1813000,-24.677,-20.1,-34.5,246.77


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1814000
  custom_metrics: {}
  date: 2021-10-22_08-54-23
  done: false
  episode_len_mean: 246.83
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.683000000000085
  episode_reward_min: -34.50000000000022
  episodes_this_iter: 4
  episodes_total: 5724
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.402073687348188e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.5644188106060029
          entropy_coeff: 0.009999999999999998
          kl: 0.02599714113141671
          policy_loss: -0.04719435200095177
          total_loss: 0.7389987713760799
          vf_explained_var: 0.44498202204704285
          vf_loss: 0.7918373088041941
    num_agent_steps_sampled: 1814000
    num_agent_steps_trained: 1814000
    num_steps_sampled: 1814000
    num_steps_trained: 18140

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1814,47303.4,1814000,-24.683,-20.1,-34.5,246.83


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1815000
  custom_metrics: {}
  date: 2021-10-22_08-54-58
  done: false
  episode_len_mean: 246.84
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.684000000000083
  episode_reward_min: -34.50000000000022
  episodes_this_iter: 4
  episodes_total: 5728
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.603110531022282e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.5633749028046926
          entropy_coeff: 0.009999999999999998
          kl: 0.0404281342989145
          policy_loss: 0.043219729761282605
          total_loss: 0.626352512008614
          vf_explained_var: 0.54789799451828
          vf_loss: 0.5887665298249987
    num_agent_steps_sampled: 1815000
    num_agent_steps_trained: 1815000
    num_steps_sampled: 1815000
    num_steps_trained: 1815000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1815,47338.3,1815000,-24.684,-20.1,-34.5,246.84




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1816000
  custom_metrics: {}
  date: 2021-10-22_08-55-39
  done: false
  episode_len_mean: 246.55
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.655000000000083
  episode_reward_min: -34.50000000000022
  episodes_this_iter: 2
  episodes_total: 5730
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.904665796533421e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.5856000489658779
          entropy_coeff: 0.009999999999999998
          kl: 0.051965246536902834
          policy_loss: 0.0027366132371955446
          total_loss: 0.4918990401344167
          vf_explained_var: 0.4177919924259186
          vf_loss: 0.49501841594982476
    num_agent_steps_sampled: 1816000
    num_agent_steps_trained: 1816000
    num_steps_sampled: 1816000
    num_steps_trained: 181

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1816,47379.5,1816000,-24.655,-20.1,-34.5,246.55


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1817000
  custom_metrics: {}
  date: 2021-10-22_08-56-09
  done: false
  episode_len_mean: 250.34
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -25.034000000000088
  episode_reward_min: -66.00000000000053
  episodes_this_iter: 4
  episodes_total: 5734
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4856998694800133e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.5383517172601487
          entropy_coeff: 0.009999999999999998
          kl: 0.009696298322983507
          policy_loss: 0.006110608039630784
          total_loss: 1.0169081992573208
          vf_explained_var: 0.4918452799320221
          vf_loss: 1.0161811113357544
    num_agent_steps_sampled: 1817000
    num_agent_steps_trained: 1817000
    num_steps_sampled: 1817000
    num_steps_trained: 1817

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1817,47409.1,1817000,-25.034,-20.1,-66,250.34


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1818000
  custom_metrics: {}
  date: 2021-10-22_08-56-40
  done: false
  episode_len_mean: 251.44
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -25.144000000000087
  episode_reward_min: -66.00000000000053
  episodes_this_iter: 4
  episodes_total: 5738
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4856998694800133e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.48035749428802066
          entropy_coeff: 0.009999999999999998
          kl: 0.05093311469269363
          policy_loss: 0.010982728004455567
          total_loss: 1.1017552693684896
          vf_explained_var: 0.27878668904304504
          vf_loss: 1.0955761167738172
    num_agent_steps_sampled: 1818000
    num_agent_steps_trained: 1818000
    num_steps_sampled: 1818000
    num_steps_trained: 181

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1818,47440.3,1818000,-25.144,-20.1,-66,251.44


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1819000
  custom_metrics: {}
  date: 2021-10-22_08-57-16
  done: false
  episode_len_mean: 251.87
  episode_media: {}
  episode_reward_max: -20.200000000000017
  episode_reward_mean: -25.187000000000083
  episode_reward_min: -66.00000000000053
  episodes_this_iter: 4
  episodes_total: 5742
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2285498042200198e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.3463393145137363
          entropy_coeff: 0.009999999999999998
          kl: 0.00783260656870384
          policy_loss: 0.03729139665762583
          total_loss: 0.9563453012042575
          vf_explained_var: 0.21207697689533234
          vf_loss: 0.9225172999832365
    num_agent_steps_sampled: 1819000
    num_agent_steps_trained: 1819000
    num_steps_sampled: 1819000
    num_steps_trained: 18190

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1819,47476.9,1819000,-25.187,-20.2,-66,251.87


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1820000
  custom_metrics: {}
  date: 2021-10-22_08-57-51
  done: false
  episode_len_mean: 252.38
  episode_media: {}
  episode_reward_max: -20.200000000000017
  episode_reward_mean: -25.23800000000009
  episode_reward_min: -66.00000000000053
  episodes_this_iter: 4
  episodes_total: 5746
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2285498042200198e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.3790427121851179
          entropy_coeff: 0.009999999999999998
          kl: 0.011162852021898257
          policy_loss: 0.029119142558839586
          total_loss: 1.1296193599700928
          vf_explained_var: 0.24476012587547302
          vf_loss: 1.104290642340978
    num_agent_steps_sampled: 1820000
    num_agent_steps_trained: 1820000
    num_steps_sampled: 1820000
    num_steps_trained: 18200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1820,47511.7,1820000,-25.238,-20.2,-66,252.38


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1821000
  custom_metrics: {}
  date: 2021-10-22_08-58-27
  done: false
  episode_len_mean: 252.5
  episode_media: {}
  episode_reward_max: -20.200000000000017
  episode_reward_mean: -25.250000000000085
  episode_reward_min: -66.00000000000053
  episodes_this_iter: 4
  episodes_total: 5750
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2285498042200198e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.26053141438298755
          entropy_coeff: 0.009999999999999998
          kl: 0.02147874190904528
          policy_loss: -0.019828067016270427
          total_loss: 1.1588156521320343
          vf_explained_var: 0.18202461302280426
          vf_loss: 1.181249033080207
    num_agent_steps_sampled: 1821000
    num_agent_steps_trained: 1821000
    num_steps_sampled: 1821000
    num_steps_trained: 1821

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1821,47547.3,1821000,-25.25,-20.2,-66,252.5


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1822000
  custom_metrics: {}
  date: 2021-10-22_08-59-03
  done: false
  episode_len_mean: 252.72
  episode_media: {}
  episode_reward_max: -20.200000000000017
  episode_reward_mean: -25.272000000000094
  episode_reward_min: -66.00000000000053
  episodes_this_iter: 4
  episodes_total: 5754
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3428247063300294e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.43970211611853705
          entropy_coeff: 0.009999999999999998
          kl: 0.005565138006250297
          policy_loss: -0.05824611534674962
          total_loss: 1.1784646603796216
          vf_explained_var: 0.24447068572044373
          vf_loss: 1.2411078015963237
    num_agent_steps_sampled: 1822000
    num_agent_steps_trained: 1822000
    num_steps_sampled: 1822000
    num_steps_trained: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1822,47583,1822000,-25.272,-20.2,-66,252.72


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1823000
  custom_metrics: {}
  date: 2021-10-22_08-59-34
  done: false
  episode_len_mean: 253.72
  episode_media: {}
  episode_reward_max: -20.200000000000017
  episode_reward_mean: -25.3720000000001
  episode_reward_min: -66.00000000000053
  episodes_this_iter: 4
  episodes_total: 5758
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3428247063300294e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.36715132461653815
          entropy_coeff: 0.009999999999999998
          kl: 0.020942783020220505
          policy_loss: -0.019239159756236607
          total_loss: 0.8473695890771018
          vf_explained_var: 0.4801126718521118
          vf_loss: 0.8702802591853671
    num_agent_steps_sampled: 1823000
    num_agent_steps_trained: 1823000
    num_steps_sampled: 1823000
    num_steps_trained: 1823

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1823,47614.9,1823000,-25.372,-20.2,-66,253.72




Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1824000
  custom_metrics: {}
  date: 2021-10-22_09-00-29
  done: false
  episode_len_mean: 254.11
  episode_media: {}
  episode_reward_max: -20.200000000000017
  episode_reward_mean: -25.41100000000009
  episode_reward_min: -66.00000000000053
  episodes_this_iter: 4
  episodes_total: 5762
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.014237059495043e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.43674585587448544
          entropy_coeff: 0.009999999999999998
          kl: 0.011917428854852434
          policy_loss: -0.05845870218343205
          total_loss: 0.6991290907065074
          vf_explained_var: 0.661958634853363
          vf_loss: 0.7619552473227184
    num_agent_steps_sampled: 1824000
    num_agent_steps_trained: 1824000
    num_steps_sampled: 1824000
    num_steps_trained: 182400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1824,47669.8,1824000,-25.411,-20.2,-66,254.11


Result for PPO_my_env_67041_00000:
  agent_timesteps_total: 1825000
  custom_metrics: {}
  date: 2021-10-22_09-01-07
  done: false
  episode_len_mean: 254.64
  episode_media: {}
  episode_reward_max: -20.200000000000017
  episode_reward_mean: -25.46400000000009
  episode_reward_min: -66.00000000000053
  episodes_this_iter: 5
  episodes_total: 5767
  experiment_id: a63f200b87524df7b0bab0d758dab5c9
  hostname: 97fec18c88ee
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.014237059495043e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.30271101577414405
          entropy_coeff: 0.009999999999999998
          kl: 0.03131255075928214
          policy_loss: -0.07390906603799927
          total_loss: 0.6569002111752827
          vf_explained_var: 0.6959981918334961
          vf_loss: 0.7338363852765825
    num_agent_steps_sampled: 1825000
    num_agent_steps_trained: 1825000
    num_steps_sampled: 1825000
    num_steps_trained: 182500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67041_00000,RUNNING,172.17.0.2:188,1825,47707.1,1825000,-25.464,-20.2,-66,254.64


Process _WandbLoggingProcess-1:
Traceback (most recent call last):
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/integration/wandb.py", line 200, in run
    result = self.queue.get()
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/queues.py", line 94, in get
    res = self._recv_bytes()
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/root/miniconda/en

Traceback (most recent call last):
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ipython-7.25.0-py3.7.egg/IPython/core/interactiveshell.py", line 3441, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_76/3010183041.py", line 29, in <module>
    loggers=[WandbLogger])
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/tune.py", line 532, in run
    runner.step()
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 554, in step
    self._process_events(timeout=timeout)  # blocking
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 675, in _process_events
    timeout=timeout)  # blocking
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 718, in get_next_available_trial
    ready, _ = ray.wait(shuffled_results, timeout=timeout)
  File "/root/miniconda/envs/py37/lib/python3.7/site-pac

TypeError: object of type 'NoneType' has no len()